Annotation of src/usr.bin/sort/sort.c, Revision 1.58
1.58 ! millert 1: /* $OpenBSD: sort.c,v 1.57 2015/04/01 20:24:12 millert Exp $ */
1.1 millert 2:
3: /*-
1.44 millert 4: * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6: * All rights reserved.
1.1 millert 7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
1.44 millert 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1.1 millert 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.44 millert 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1.1 millert 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
1.48 millert 30: #include <sys/resource.h>
1.44 millert 31: #include <sys/stat.h>
1.48 millert 32: #include <sys/sysctl.h>
1.44 millert 33: #include <sys/types.h>
1.1 millert 34:
1.44 millert 35: #include <err.h>
36: #include <errno.h>
37: #include <getopt.h>
38: #include <limits.h>
1.16 ericj 39: #include <locale.h>
1.44 millert 40: #include <md5.h>
41: #include <regex.h>
1.1 millert 42: #include <signal.h>
1.44 millert 43: #include <stdbool.h>
44: #include <stdio.h>
1.1 millert 45: #include <stdlib.h>
46: #include <string.h>
47: #include <unistd.h>
1.44 millert 48: #include <wchar.h>
49: #include <wctype.h>
50:
51: #include "coll.h"
52: #include "file.h"
53: #include "sort.h"
54:
55: #define OPTIONS "bCcdfgHhik:Mmno:RrS:st:T:uVz"
56:
57: static bool need_random;
58: static const char *random_source;
59:
60: MD5_CTX md5_ctx;
61:
62: struct sort_opts sort_opts_vals;
63:
64: bool debug_sort;
65: bool need_hint;
66:
67: static bool gnusort_numeric_compatibility;
68:
69: static struct sort_mods default_sort_mods_object;
70: struct sort_mods * const default_sort_mods = &default_sort_mods_object;
71:
72: static bool print_symbols_on_debug;
73:
74: /*
75: * Arguments from file (when file0-from option is used:
76: */
77: static size_t argc_from_file0 = (size_t)-1;
78: static char **argv_from_file0;
79:
80: /*
81: * Placeholder symbols for options which have no single-character equivalent
82: */
83: enum {
84: SORT_OPT = CHAR_MAX + 1,
85: HELP_OPT,
86: FF_OPT,
87: BS_OPT,
88: VERSION_OPT,
89: DEBUG_OPT,
90: RANDOMSOURCE_OPT,
91: COMPRESSPROGRAM_OPT,
92: QSORT_OPT,
93: HEAPSORT_OPT,
94: RADIXSORT_OPT,
95: MMAP_OPT
96: };
97:
98: #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
99: static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
100:
101: static const struct option long_options[] = {
102: { "batch-size", required_argument, NULL, BS_OPT },
103: { "buffer-size", required_argument, NULL, 'S' },
104: { "check", optional_argument, NULL, 'c' },
105: { "check=silent|quiet", optional_argument, NULL, 'C' },
106: { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
107: { "debug", no_argument, NULL, DEBUG_OPT },
108: { "dictionary-order", no_argument, NULL, 'd' },
109: { "field-separator", required_argument, NULL, 't' },
110: { "files0-from", required_argument, NULL, FF_OPT },
111: { "general-numeric-sort", no_argument, NULL, 'g' },
112: { "heapsort", no_argument, NULL, HEAPSORT_OPT },
113: { "help", no_argument, NULL, HELP_OPT },
114: { "human-numeric-sort", no_argument, NULL, 'h' },
115: { "ignore-leading-blanks", no_argument, NULL, 'b' },
116: { "ignore-case", no_argument, NULL, 'f' },
117: { "ignore-nonprinting", no_argument, NULL, 'i' },
118: { "key", required_argument, NULL, 'k' },
119: { "merge", no_argument, NULL, 'm' },
120: { "mergesort", no_argument, NULL, 'H' },
121: { "mmap", no_argument, NULL, MMAP_OPT },
122: { "month-sort", no_argument, NULL, 'M' },
123: { "numeric-sort", no_argument, NULL, 'n' },
124: { "output", required_argument, NULL, 'o' },
125: { "qsort", no_argument, NULL, QSORT_OPT },
126: { "radixsort", no_argument, NULL, RADIXSORT_OPT },
127: { "random-sort", no_argument, NULL, 'R' },
128: { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
129: { "reverse", no_argument, NULL, 'r' },
130: { "sort", required_argument, NULL, SORT_OPT },
131: { "stable", no_argument, NULL, 's' },
132: { "temporary-directory", required_argument, NULL, 'T' },
133: { "unique", no_argument, NULL, 'u' },
134: { "version", no_argument, NULL, VERSION_OPT },
135: { "version-sort", no_argument, NULL, 'V' },
136: { "zero-terminated", no_argument, NULL, 'z' },
137: { NULL, no_argument, NULL, 0 }
138: };
139:
140: /*
141: * Check where sort modifier is present
142: */
143: static bool
144: sort_modifier_empty(struct sort_mods *sm)
145: {
146:
147: if (sm == NULL)
148: return true;
149: return !(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
150: sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag);
151: }
152:
153: /*
154: * Print out usage text.
155: */
156: static __dead void
157: usage(int exit_val)
158: {
1.1 millert 159:
1.44 millert 160: fprintf(exit_val ? stderr : stdout,
1.46 jmc 161: "usage: %s [-bCcdfgHhiMmnRrsuVz] [-k field1[,field2]] [-o output] "
1.45 jmc 162: "[-S size]\n\t[-T dir] [-t char] [file ...]\n", getprogname());
1.44 millert 163: exit(exit_val);
164: }
1.4 millert 165:
1.1 millert 166: /*
1.44 millert 167: * Read input file names from a file (file0-from option).
1.1 millert 168: */
1.44 millert 169: static void
170: read_fns_from_file0(const char *fn)
171: {
1.47 millert 172: FILE *f;
173: char *line = NULL;
174: size_t linesize = 0;
175: ssize_t linelen;
176:
177: if (fn == NULL)
178: return;
179:
180: f = fopen(fn, "r");
181: if (f == NULL)
182: err(2, "%s", fn);
183:
184: while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
185: if (*line != '\0') {
186: if (argc_from_file0 == (size_t)-1)
187: argc_from_file0 = 0;
188: ++argc_from_file0;
189: argv_from_file0 = sort_reallocarray(argv_from_file0,
190: argc_from_file0, sizeof(char *));
191: argv_from_file0[argc_from_file0 - 1] = line;
192: } else {
193: free(line);
1.44 millert 194: }
1.47 millert 195: line = NULL;
196: linesize = 0;
1.44 millert 197: }
1.47 millert 198: if (ferror(f))
199: err(2, "%s: getdelim", fn);
200:
201: closefile(f, fn);
1.44 millert 202: }
1.4 millert 203:
1.1 millert 204: /*
1.44 millert 205: * Check how much RAM is available for the sort.
1.1 millert 206: */
1.44 millert 207: static void
208: set_hw_params(void)
209: {
1.48 millert 210: long long user_memory;
211: struct rlimit rl;
212: size_t len;
213: int mib[] = { CTL_HW, HW_USERMEM64 };
214:
215: /* Get total user (non-kernel) memory. */
216: len = sizeof(user_memory);
217: if (sysctl(mib, 2, &user_memory, &len, NULL, 0) == -1)
218: user_memory = -1;
219:
220: /* Increase our data size to the max */
221: if (getrlimit(RLIMIT_DATA, &rl) == 0) {
222: free_memory = (unsigned long long)rl.rlim_cur;
223: rl.rlim_cur = rl.rlim_max;
224: if (setrlimit(RLIMIT_DATA, &rl) == 0) {
225: free_memory = (unsigned long long)rl.rlim_max;
226: } else {
227: warn("Can't set resource limit to max data size");
228: }
229: } else
230: warn("Can't get resource limit for data size");
1.1 millert 231:
1.48 millert 232: /* We prefer to use temp files rather than swap space. */
233: if (user_memory != -1 && free_memory > user_memory)
234: free_memory = user_memory;
1.44 millert 235:
236: available_free_memory = free_memory / 2;
237: }
238:
239: /*
240: * Convert "plain" symbol to wide symbol, with default value.
241: */
242: static void
243: conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
244: {
1.1 millert 245:
1.44 millert 246: if (wc && c) {
247: int res;
1.1 millert 248:
1.44 millert 249: res = mbtowc(wc, c, MB_CUR_MAX);
250: if (res < 1)
251: *wc = def;
1.12 millert 252: }
1.44 millert 253: }
1.12 millert 254:
1.44 millert 255: /*
256: * Set current locale symbols.
257: */
258: static void
259: set_locale(void)
1.1 millert 260: {
1.44 millert 261: struct lconv *lc;
262: const char *locale;
1.4 millert 263:
1.16 ericj 264: setlocale(LC_ALL, "");
265:
1.44 millert 266: lc = localeconv();
267:
268: if (lc) {
269: /* obtain LC_NUMERIC info */
270: /* Convert to wide char form */
271: conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
272: symbol_decimal_point);
273: conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
274: symbol_thousands_sep);
275: conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
276: symbol_positive_sign);
277: conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
278: symbol_negative_sign);
279: }
280:
281: if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
282: gnusort_numeric_compatibility = true;
283:
284: locale = setlocale(LC_COLLATE, NULL);
285: if (locale != NULL) {
286: char *tmpl;
287: const char *byteclocale;
288:
289: tmpl = sort_strdup(locale);
290: byteclocale = setlocale(LC_COLLATE, "C");
291: if (byteclocale && strcmp(byteclocale, tmpl) == 0) {
292: byte_sort = true;
293: } else {
294: byteclocale = setlocale(LC_COLLATE, "POSIX");
295: if (byteclocale && strcmp(byteclocale, tmpl) == 0)
296: byte_sort = true;
297: else
298: setlocale(LC_COLLATE, tmpl);
299: }
300: sort_free(tmpl);
301: }
302: if (!byte_sort)
303: sort_mb_cur_max = MB_CUR_MAX;
304: }
305:
306: /*
307: * Set directory temporary files.
308: */
309: static void
310: set_tmpdir(void)
311: {
1.53 millert 312: if (!issetugid()) {
313: char *td;
1.44 millert 314:
1.53 millert 315: td = getenv("TMPDIR");
316: if (td != NULL)
317: tmpdir = sort_strdup(td);
318: }
1.44 millert 319: }
320:
321: /*
322: * Parse -S option.
323: */
324: static unsigned long long
325: parse_memory_buffer_value(const char *value)
326: {
327:
328: if (value == NULL)
329: return available_free_memory;
330: else {
331: char *endptr;
332: unsigned long long membuf;
333:
334: membuf = strtoll(value, &endptr, 10);
1.55 millert 335: if (endptr == value || (long long)membuf < 0 ||
336: (errno == ERANGE && membuf == LONG_MAX))
337: errx(2, "invalid memory buffer size: %s", value);
338:
339: switch (*endptr) {
340: case 'Y':
341: membuf *= 1024;
342: /* FALLTHROUGH */
343: case 'Z':
344: membuf *= 1024;
345: /* FALLTHROUGH */
346: case 'E':
347: membuf *= 1024;
348: /* FALLTHROUGH */
349: case 'P':
350: membuf *= 1024;
351: /* FALLTHROUGH */
352: case 'T':
353: membuf *= 1024;
354: /* FALLTHROUGH */
355: case 'G':
356: membuf *= 1024;
357: /* FALLTHROUGH */
358: case 'M':
359: membuf *= 1024;
360: /* FALLTHROUGH */
361: case '\0':
362: case 'K':
363: membuf *= 1024;
364: /* FALLTHROUGH */
365: case 'b':
366: break;
367: case '%':
368: membuf = (available_free_memory * membuf) /
369: 100;
370: break;
371: default:
372: warnc(EINVAL, "%s", optarg);
1.44 millert 373: membuf = available_free_memory;
374: }
375: return membuf;
376: }
377: }
378:
379: /*
380: * Signal handler that clears the temporary files.
381: */
382: static void
1.49 millert 383: sig_handler(int sig __unused)
1.44 millert 384: {
385:
386: clear_tmp_files();
1.50 millert 387: _exit(2);
1.44 millert 388: }
389:
390: /*
391: * Set signal handler on panic signals.
392: */
393: static void
394: set_signal_handler(void)
395: {
396: struct sigaction sa;
1.49 millert 397: int i, signals[] = {SIGTERM, SIGHUP, SIGINT, SIGQUIT, SIGUSR1, SIGUSR2,
398: SIGPIPE, SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0};
1.44 millert 399:
400: memset(&sa, 0, sizeof(sa));
1.49 millert 401: sigfillset(&sa.sa_mask);
402: sa.sa_flags = SA_RESTART;
403: sa.sa_handler = sig_handler;
404:
405: for (i = 0; signals[i] != 0; i++) {
406: if (sigaction(signals[i], &sa, NULL) < 0) {
407: warn("sigaction(%d)", i);
408: continue;
409: }
1.44 millert 410: }
411: }
412:
413: /*
414: * Print "unknown" message and exit with status 2.
415: */
416: static void
417: unknown(const char *what)
418: {
419:
420: errx(2, "Unknown feature: %s", what);
421: }
422:
423: /*
424: * Check whether contradictory input options are used.
425: */
426: static void
427: check_mutually_exclusive_flags(char c, bool *mef_flags)
428: {
429: int i, fo_index, mec;
430: bool found_others, found_this;
431:
432: found_others = found_this =false;
433: fo_index = 0;
434:
435: for (i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
436: mec = mutually_exclusive_flags[i];
437:
438: if (mec != c) {
439: if (mef_flags[i]) {
440: if (found_this)
441: errx(1, "%c:%c: mutually exclusive flags", c, mec);
442: found_others = true;
443: fo_index = i;
444: }
445: } else {
446: if (found_others)
447: errx(1, "%c:%c: mutually exclusive flags", c, mutually_exclusive_flags[fo_index]);
448: mef_flags[i] = true;
449: found_this = true;
450: }
451: }
452: }
453:
454: /*
455: * Initialise sort opts data.
456: */
457: static void
458: set_sort_opts(void)
459: {
460:
461: memset(&default_sort_mods_object, 0,
462: sizeof(default_sort_mods_object));
463: memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
464: default_sort_mods_object.func =
465: get_sort_func(&default_sort_mods_object);
466: }
467:
468: /*
469: * Set a sort modifier on a sort modifiers object.
470: */
471: static bool
472: set_sort_modifier(struct sort_mods *sm, int c)
473: {
474:
475: if (sm) {
476: switch (c){
477: case 'b':
478: sm->bflag = true;
1.1 millert 479: break;
480: case 'd':
1.44 millert 481: sm->dflag = true;
482: break;
1.4 millert 483: case 'f':
1.44 millert 484: sm->fflag = true;
485: break;
486: case 'g':
487: sm->gflag = true;
488: need_hint = true;
489: break;
1.1 millert 490: case 'i':
1.44 millert 491: sm->iflag = true;
1.1 millert 492: break;
493: case 'R':
1.44 millert 494: sm->Rflag = true;
495: need_random = true;
1.1 millert 496: break;
1.44 millert 497: case 'M':
498: initialise_months();
499: sm->Mflag = true;
500: need_hint = true;
1.1 millert 501: break;
1.44 millert 502: case 'n':
503: sm->nflag = true;
504: need_hint = true;
505: print_symbols_on_debug = true;
1.1 millert 506: break;
1.44 millert 507: case 'r':
508: sm->rflag = true;
1.1 millert 509: break;
1.44 millert 510: case 'V':
511: sm->Vflag = true;
1.1 millert 512: break;
1.44 millert 513: case 'h':
514: sm->hflag = true;
515: need_hint = true;
516: print_symbols_on_debug = true;
1.1 millert 517: break;
1.8 deraadt 518: default:
1.44 millert 519: return false;
1.1 millert 520: }
1.44 millert 521: sort_opts_vals.complex_sort = true;
522: sm->func = get_sort_func(sm);
523: }
524: return true;
525: }
526:
527: /*
528: * Parse POS in -k option.
529: */
530: static int
531: parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
532: {
533: regmatch_t pmatch[4];
534: regex_t re;
535: char *c, *f;
536: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
537: size_t len, nmatch;
538: int ret;
539:
540: ret = -1;
541: nmatch = 4;
542: c = f = NULL;
543:
544: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
545: return -1;
546:
547: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
548: goto end;
549:
550: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
551: goto end;
552:
553: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
554: goto end;
555:
556: len = pmatch[1].rm_eo - pmatch[1].rm_so;
557:
1.57 millert 558: f = sort_malloc(len + 1);
559: memcpy(f, s + pmatch[1].rm_so, len);
1.44 millert 560: f[len] = '\0';
561:
562: if (second) {
563: errno = 0;
1.58 ! millert 564: ks->f2 = (size_t)strtoul(f, NULL, 10);
1.44 millert 565: if (errno != 0)
1.58 ! millert 566: goto end;
1.44 millert 567: if (ks->f2 == 0) {
568: warn("0 field in key specs");
569: goto end;
570: }
571: } else {
572: errno = 0;
1.58 ! millert 573: ks->f1 = (size_t)strtoul(f, NULL, 10);
1.44 millert 574: if (errno != 0)
1.58 ! millert 575: goto end;
1.44 millert 576: if (ks->f1 == 0) {
577: warn("0 field in key specs");
578: goto end;
579: }
580: }
581:
582: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
583: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
584:
1.57 millert 585: c = sort_malloc(len + 1);
586: memcpy(c, s + pmatch[2].rm_so + 1, len);
1.44 millert 587: c[len] = '\0';
588:
589: if (second) {
590: errno = 0;
1.58 ! millert 591: ks->c2 = (size_t)strtoul(c, NULL, 10);
1.44 millert 592: if (errno != 0)
1.58 ! millert 593: goto end;
1.44 millert 594: } else {
595: errno = 0;
1.58 ! millert 596: ks->c1 = (size_t)strtoul(c, NULL, 10);
1.44 millert 597: if (errno != 0)
1.58 ! millert 598: goto end;
1.44 millert 599: if (ks->c1 == 0) {
600: warn("0 column in key specs");
601: goto end;
602: }
603: }
604: } else {
605: if (second)
606: ks->c2 = 0;
607: else
608: ks->c1 = 1;
609: }
610:
611: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
612: regoff_t i = 0;
613:
614: for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
615: check_mutually_exclusive_flags(s[i], mef_flags);
616: if (s[i] == 'b') {
617: if (second)
618: ks->pos2b = true;
619: else
620: ks->pos1b = true;
621: } else if (!set_sort_modifier(&(ks->sm), s[i]))
622: goto end;
623: }
624: }
625:
626: ret = 0;
627:
628: end:
629:
630: if (c)
631: sort_free(c);
632: if (f)
633: sort_free(f);
634: regfree(&re);
635:
636: return ret;
637: }
638:
639: /*
640: * Parse -k option value.
641: */
642: static int
643: parse_k(const char *s, struct key_specs *ks)
644: {
645: int ret = -1;
646: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
647: { false, false, false, false, false, false };
648:
649: if (s && *s) {
650: char *sptr;
651:
652: sptr = strchr(s, ',');
653: if (sptr) {
654: size_t size1;
655: char *pos1, *pos2;
656:
657: size1 = sptr - s;
658:
659: if (size1 < 1)
660: return -1;
661:
1.57 millert 662: pos1 = sort_malloc(size1 + 1);
663: memcpy(pos1, s, size1);
1.44 millert 664: pos1[size1] = '\0';
665:
666: ret = parse_pos(pos1, ks, mef_flags, false);
667:
668: sort_free(pos1);
669: if (ret < 0)
670: return ret;
671:
672: pos2 = sort_strdup(sptr + 1);
673: ret = parse_pos(pos2, ks, mef_flags, true);
674: sort_free(pos2);
675: } else
676: ret = parse_pos(s, ks, mef_flags, false);
1.1 millert 677: }
1.4 millert 678:
1.44 millert 679: return ret;
680: }
681:
682: /*
683: * Parse POS in +POS -POS option.
684: */
685: static int
1.56 millert 686: parse_pos_obs(const char *s, size_t *nf, size_t *nc, char *sopts,
687: size_t sopts_size)
1.44 millert 688: {
689: regex_t re;
690: regmatch_t pmatch[4];
691: char *c, *f;
692: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
693: int ret;
694: size_t len, nmatch;
695:
696: ret = -1;
697: nmatch = 4;
698: c = f = NULL;
699: *nc = *nf = 0;
700:
701: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
702: return -1;
703:
704: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
705: goto end;
706:
707: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
708: goto end;
709:
710: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
711: goto end;
712:
713: len = pmatch[1].rm_eo - pmatch[1].rm_so;
714:
1.57 millert 715: f = sort_malloc(len + 1);
716: memcpy(f, s + pmatch[1].rm_so, len);
1.44 millert 717: f[len] = '\0';
718:
719: errno = 0;
720: *nf = (size_t) strtoul(f, NULL, 10);
721: if (errno != 0)
722: errx(2, "Invalid key position");
723:
724: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
725: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
1.57 millert 726:
1.44 millert 727: c = sort_malloc((len + 1) * sizeof(char));
1.57 millert 728: memcpy(c, s + pmatch[2].rm_so + 1, len);
1.44 millert 729: c[len] = '\0';
730:
731: errno = 0;
732: *nc = (size_t) strtoul(c, NULL, 10);
733: if (errno != 0)
734: errx(2, "Invalid key position");
735: }
736:
737: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
738:
739: len = pmatch[3].rm_eo - pmatch[3].rm_so;
1.4 millert 740:
1.56 millert 741: if (len >= sopts_size)
742: errx(2, "Invalid key position");
1.57 millert 743: memcpy(sopts, s + pmatch[3].rm_so, len);
1.44 millert 744: sopts[len] = '\0';
1.1 millert 745: }
1.4 millert 746:
1.44 millert 747: ret = 0;
1.4 millert 748:
1.44 millert 749: end:
750: if (c)
751: sort_free(c);
752: if (f)
753: sort_free(f);
754: regfree(&re);
755:
756: return ret;
757: }
758:
759: /*
760: * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
761: */
762: static void
763: fix_obsolete_keys(int *argc, char **argv)
764: {
765: char sopt[129];
766: int i;
767:
768: for (i = 1; i < *argc; i++) {
769: char *arg1;
770:
771: arg1 = argv[i];
772:
773: if (strlen(arg1) > 1 && arg1[0] == '+') {
1.56 millert 774: size_t c1, f1;
1.44 millert 775: char sopts1[128];
776:
777: sopts1[0] = 0;
778: c1 = f1 = 0;
779:
1.56 millert 780: if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1,
781: sizeof(sopts1)) < 0)
1.44 millert 782: continue;
1.1 millert 783: else {
1.44 millert 784: f1 += 1;
785: c1 += 1;
786: if (i + 1 < *argc) {
787: char *arg2 = argv[i + 1];
788:
789: if (strlen(arg2) > 1 &&
790: arg2[0] == '-') {
1.56 millert 791: size_t c2, f2;
1.44 millert 792: char sopts2[128];
793:
794: sopts2[0] = 0;
795: c2 = f2 = 0;
796:
797: if (parse_pos_obs(arg2 + 1,
1.56 millert 798: &f2, &c2, sopts2,
799: sizeof(sopts2)) >= 0) {
1.44 millert 800: int j;
801: if (c2 > 0)
802: f2 += 1;
803: snprintf(sopt,
804: sizeof(sopt),
1.56 millert 805: "-k%zu.%zu%s,%zu.%zu%s",
1.44 millert 806: f1, c1, sopts1, f2,
807: c2, sopts2);
808: argv[i] = sort_strdup(sopt);
809: for (j = i + 1; j + 1 < *argc; j++)
810: argv[j] = argv[j + 1];
811: *argc -= 1;
812: continue;
813: }
814: }
815: }
1.56 millert 816: snprintf(sopt, sizeof(sopt), "-k%zu.%zu%s",
1.44 millert 817: f1, c1, sopts1);
818: argv[i] = sort_strdup(sopt);
1.1 millert 819: }
1.44 millert 820: }
1.1 millert 821: }
1.44 millert 822: }
823:
824: /*
825: * Set random seed
826: */
827: static void
828: set_random_seed(void)
829: {
830: if (!need_random)
831: return;
1.4 millert 832:
1.44 millert 833: MD5Init(&md5_ctx);
834: if (random_source != NULL) {
835: unsigned char buf[BUFSIZ];
836: size_t nr;
837: FILE *fp;
838:
839: if ((fp = fopen(random_source, "r")) == NULL)
840: err(2, "%s", random_source);
841: while ((nr = fread(buf, 1, sizeof(buf), fp)) != 0)
842: MD5Update(&md5_ctx, buf, nr);
843: if (ferror(fp))
844: err(2, "%s", random_source);
845: fclose(fp);
1.1 millert 846: } else {
1.44 millert 847: unsigned char rsd[1024];
848:
849: arc4random_buf(rsd, sizeof(rsd));
850: MD5Update(&md5_ctx, rsd, sizeof(rsd));
851: }
852: }
853:
854: /*
855: * Main function.
856: */
857: int
858: main(int argc, char *argv[])
859: {
860: char *outfile, *real_outfile;
861: int c, result;
862: size_t i;
863: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
864: { false, false, false, false, false, false };
865:
866: result = 0;
1.51 millert 867: outfile = "-";
1.44 millert 868: real_outfile = NULL;
869:
870: struct sort_mods *sm = &default_sort_mods_object;
871:
872: init_tmp_files();
873:
874: set_signal_handler();
875:
1.51 millert 876: atexit(clear_tmp_files);
877:
1.44 millert 878: set_hw_params();
879: set_locale();
880: set_tmpdir();
881: set_sort_opts();
882:
883: fix_obsolete_keys(&argc, argv);
884:
885: while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
886: != -1)) {
887:
888: check_mutually_exclusive_flags(c, mef_flags);
889:
890: if (!set_sort_modifier(sm, c)) {
891:
892: switch (c) {
893: case 'c':
894: sort_opts_vals.cflag = true;
895: if (optarg) {
896: if (!strcmp(optarg, "diagnose-first"))
897: ;
898: else if (!strcmp(optarg, "silent") ||
899: !strcmp(optarg, "quiet"))
900: sort_opts_vals.csilentflag = true;
901: else if (*optarg)
902: unknown(optarg);
903: }
904: break;
905: case 'C':
906: sort_opts_vals.cflag = true;
907: sort_opts_vals.csilentflag = true;
908: break;
909: case 'k':
910: {
911: sort_opts_vals.complex_sort = true;
912: sort_opts_vals.kflag = true;
913:
914: keys_num++;
915: keys = sort_reallocarray(keys, keys_num,
916: sizeof(struct key_specs));
917: memset(&(keys[keys_num - 1]), 0,
918: sizeof(struct key_specs));
919:
920: if (parse_k(optarg, &(keys[keys_num - 1]))
921: < 0) {
922: errc(2, EINVAL, "-k %s", optarg);
923: }
924:
925: break;
926: }
927: case 'm':
928: sort_opts_vals.mflag = true;
929: break;
930: case 'o':
1.51 millert 931: outfile = optarg;
1.44 millert 932: break;
933: case 's':
934: sort_opts_vals.sflag = true;
935: break;
936: case 'S':
937: available_free_memory =
938: parse_memory_buffer_value(optarg);
939: break;
940: case 'T':
941: tmpdir = sort_strdup(optarg);
942: break;
943: case 't':
944: while (strlen(optarg) > 1) {
945: if (optarg[0] != '\\') {
946: errc(2, EINVAL, "%s", optarg);
947: }
948: optarg += 1;
949: if (*optarg == '0') {
950: *optarg = 0;
951: break;
952: }
953: }
954: sort_opts_vals.tflag = true;
955: sort_opts_vals.field_sep = btowc(optarg[0]);
956: if (sort_opts_vals.field_sep == WEOF) {
957: errno = EINVAL;
958: err(2, NULL);
959: }
960: if (!gnusort_numeric_compatibility) {
961: if (symbol_decimal_point == sort_opts_vals.field_sep)
962: symbol_decimal_point = WEOF;
963: if (symbol_thousands_sep == sort_opts_vals.field_sep)
964: symbol_thousands_sep = WEOF;
965: if (symbol_negative_sign == sort_opts_vals.field_sep)
966: symbol_negative_sign = WEOF;
967: if (symbol_positive_sign == sort_opts_vals.field_sep)
968: symbol_positive_sign = WEOF;
969: }
970: break;
971: case 'u':
972: sort_opts_vals.uflag = true;
973: /* stable sort for the correct unique val */
974: sort_opts_vals.sflag = true;
975: break;
976: case 'z':
977: sort_opts_vals.zflag = true;
978: break;
979: case SORT_OPT:
980: if (optarg) {
981: if (!strcmp(optarg, "general-numeric"))
982: set_sort_modifier(sm, 'g');
983: else if (!strcmp(optarg, "human-numeric"))
984: set_sort_modifier(sm, 'h');
985: else if (!strcmp(optarg, "numeric"))
986: set_sort_modifier(sm, 'n');
987: else if (!strcmp(optarg, "month"))
988: set_sort_modifier(sm, 'M');
989: else if (!strcmp(optarg, "random"))
990: set_sort_modifier(sm, 'R');
991: else
992: unknown(optarg);
993: }
994: break;
995: case QSORT_OPT:
996: sort_opts_vals.sort_method = SORT_QSORT;
997: break;
998: case 'H':
999: sort_opts_vals.sort_method = SORT_MERGESORT;
1000: break;
1001: case MMAP_OPT:
1002: use_mmap = true;
1003: break;
1004: case HEAPSORT_OPT:
1005: sort_opts_vals.sort_method = SORT_HEAPSORT;
1006: break;
1007: case RADIXSORT_OPT:
1008: sort_opts_vals.sort_method = SORT_RADIXSORT;
1009: break;
1010: case RANDOMSOURCE_OPT:
1011: random_source = strdup(optarg);
1012: break;
1013: case COMPRESSPROGRAM_OPT:
1014: compress_program = strdup(optarg);
1015: break;
1016: case FF_OPT:
1017: read_fns_from_file0(optarg);
1018: break;
1019: case BS_OPT:
1020: {
1.54 millert 1021: const char *errstr;
1022:
1023: max_open_files = strtonum(optarg, 2,
1024: UINT_MAX - 1, &errstr) + 1;
1025: if (errstr != NULL)
1026: errx(2, "--batch-size argument is %s",
1027: errstr);
1028: break;
1.44 millert 1029: }
1030: case VERSION_OPT:
1031: printf("%s\n", VERSION);
1032: exit(EXIT_SUCCESS);
1033: /* NOTREACHED */
1034: break;
1035: case DEBUG_OPT:
1036: debug_sort = true;
1037: break;
1038: case HELP_OPT:
1039: usage(0);
1040: /* NOTREACHED */
1041: break;
1042: default:
1043: usage(2);
1044: /* NOTREACHED */
1045: }
1046: }
1047: }
1048:
1049: argc -= optind;
1050: argv += optind;
1051:
1052: if (keys_num == 0) {
1053: keys_num = 1;
1054: keys = sort_realloc(keys, sizeof(struct key_specs));
1055: memset(&(keys[0]), 0, sizeof(struct key_specs));
1056: keys[0].c1 = 1;
1057: keys[0].pos1b = default_sort_mods->bflag;
1058: keys[0].pos2b = default_sort_mods->bflag;
1059: memcpy(&(keys[0].sm), default_sort_mods,
1060: sizeof(struct sort_mods));
1061: }
1062:
1063: for (i = 0; i < keys_num; i++) {
1064: struct key_specs *ks;
1065:
1066: ks = &(keys[i]);
1067:
1068: if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1069: !(ks->pos2b)) {
1070: ks->pos1b = sm->bflag;
1071: ks->pos2b = sm->bflag;
1072: memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1073: }
1074:
1075: ks->sm.func = get_sort_func(&(ks->sm));
1076: }
1077:
1078: if (argv_from_file0) {
1079: argc = argc_from_file0;
1080: argv = argv_from_file0;
1081: }
1082:
1083: if (debug_sort) {
1084: printf("Memory to be used for sorting: %llu\n",
1085: available_free_memory);
1086: printf("Using collate rules of %s locale\n",
1087: setlocale(LC_COLLATE, NULL));
1088: if (byte_sort)
1089: printf("Byte sort is used\n");
1090: if (print_symbols_on_debug) {
1091: printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1092: if (symbol_thousands_sep)
1093: printf("Thousands separator: <%lc>\n",
1094: symbol_thousands_sep);
1095: printf("Positive sign: <%lc>\n", symbol_positive_sign);
1096: printf("Negative sign: <%lc>\n", symbol_negative_sign);
1097: }
1098: }
1099:
1100: set_random_seed();
1.4 millert 1101:
1.44 millert 1102: /* Case when the outfile equals one of the input files: */
1.51 millert 1103: if (strcmp(outfile, "-") != 0) {
1104: struct stat sb;
1105: int fd, i;
1.44 millert 1106:
1107: for (i = 0; i < argc; ++i) {
1108: if (strcmp(argv[i], outfile) == 0) {
1.51 millert 1109: if (stat(outfile, &sb) == -1)
1110: err(2, "%s", outfile);
1111: if (access(outfile, W_OK) == -1)
1112: err(2, "%s", outfile);
1113: real_outfile = outfile;
1114: sort_asprintf(&outfile, "%s.XXXXXXXXXX",
1115: real_outfile);
1116: if ((fd = mkstemp(outfile)) == -1 ||
1117: fchmod(fd, sb.st_mode & ALLPERMS) == -1)
1118: err(2, "%s", outfile);
1119: close(fd);
1.44 millert 1120: tmp_file_atexit(outfile);
1.51 millert 1121: break;
1.44 millert 1122: }
1123: }
1124: }
1125:
1126: if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1127: struct file_list fl;
1128: struct sort_list list;
1129:
1130: sort_list_init(&list);
1131: file_list_init(&fl, true);
1132:
1133: if (argc < 1)
1134: procfile("-", &list, &fl);
1135: else {
1136: while (argc > 0) {
1137: procfile(*argv, &list, &fl);
1138: --argc;
1139: ++argv;
1140: }
1141: }
1142:
1143: if (fl.count < 1)
1144: sort_list_to_file(&list, outfile);
1145: else {
1146: if (list.count > 0) {
1147: char *flast = new_tmp_file_name();
1148:
1149: sort_list_to_file(&list, flast);
1150: file_list_add(&fl, flast, false);
1151: }
1152: merge_files(&fl, outfile);
1153: }
1154:
1155: file_list_clean(&fl);
1156:
1157: /*
1158: * We are about to exit the program, so we can ignore
1159: * the clean-up for speed
1160: *
1161: * sort_list_clean(&list);
1162: */
1163:
1164: } else if (sort_opts_vals.cflag) {
1165: result = (argc == 0) ? (check("-")) : (check(*argv));
1166: } else if (sort_opts_vals.mflag) {
1167: struct file_list fl;
1168:
1169: file_list_init(&fl, false);
1170: file_list_populate(&fl, argc, argv, true);
1171: merge_files(&fl, outfile);
1172: file_list_clean(&fl);
1173: }
1174:
1175: if (real_outfile) {
1176: if (rename(outfile, real_outfile) < 0)
1177: err(2, "%s", real_outfile);
1.51 millert 1178: sort_free(outfile);
1.44 millert 1179: }
1.4 millert 1180:
1.44 millert 1181: return result;
1.1 millert 1182: }