Annotation of src/usr.bin/sort/sort.c, Revision 1.65
1.65 ! millert 1: /* $OpenBSD: sort.c,v 1.64 2015/04/01 21:37:47 millert Exp $ */
1.1 millert 2:
3: /*-
1.44 millert 4: * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6: * All rights reserved.
1.1 millert 7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
1.44 millert 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1.1 millert 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.44 millert 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1.1 millert 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
1.48 millert 30: #include <sys/resource.h>
1.44 millert 31: #include <sys/stat.h>
1.48 millert 32: #include <sys/sysctl.h>
1.44 millert 33: #include <sys/types.h>
1.1 millert 34:
1.44 millert 35: #include <err.h>
36: #include <errno.h>
37: #include <getopt.h>
38: #include <limits.h>
1.16 ericj 39: #include <locale.h>
1.44 millert 40: #include <md5.h>
41: #include <regex.h>
1.1 millert 42: #include <signal.h>
1.44 millert 43: #include <stdbool.h>
44: #include <stdio.h>
1.1 millert 45: #include <stdlib.h>
46: #include <string.h>
47: #include <unistd.h>
1.44 millert 48: #include <wchar.h>
49: #include <wctype.h>
50:
51: #include "coll.h"
52: #include "file.h"
53: #include "sort.h"
54:
55: #define OPTIONS "bCcdfgHhik:Mmno:RrS:st:T:uVz"
56:
57: static bool need_random;
58: static const char *random_source;
59:
60: MD5_CTX md5_ctx;
61:
62: struct sort_opts sort_opts_vals;
63:
64: bool debug_sort;
65: bool need_hint;
66:
67: static bool gnusort_numeric_compatibility;
68:
69: static struct sort_mods default_sort_mods_object;
70: struct sort_mods * const default_sort_mods = &default_sort_mods_object;
71:
72: static bool print_symbols_on_debug;
73:
74: /*
75: * Arguments from file (when file0-from option is used:
76: */
77: static size_t argc_from_file0 = (size_t)-1;
78: static char **argv_from_file0;
79:
80: /*
81: * Placeholder symbols for options which have no single-character equivalent
82: */
83: enum {
84: SORT_OPT = CHAR_MAX + 1,
85: HELP_OPT,
86: FF_OPT,
87: BS_OPT,
88: VERSION_OPT,
89: DEBUG_OPT,
90: RANDOMSOURCE_OPT,
91: COMPRESSPROGRAM_OPT,
92: QSORT_OPT,
93: HEAPSORT_OPT,
94: RADIXSORT_OPT,
95: MMAP_OPT
96: };
97:
98: #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
99: static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
100:
101: static const struct option long_options[] = {
102: { "batch-size", required_argument, NULL, BS_OPT },
103: { "buffer-size", required_argument, NULL, 'S' },
104: { "check", optional_argument, NULL, 'c' },
105: { "check=silent|quiet", optional_argument, NULL, 'C' },
106: { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
107: { "debug", no_argument, NULL, DEBUG_OPT },
108: { "dictionary-order", no_argument, NULL, 'd' },
109: { "field-separator", required_argument, NULL, 't' },
110: { "files0-from", required_argument, NULL, FF_OPT },
111: { "general-numeric-sort", no_argument, NULL, 'g' },
112: { "heapsort", no_argument, NULL, HEAPSORT_OPT },
113: { "help", no_argument, NULL, HELP_OPT },
114: { "human-numeric-sort", no_argument, NULL, 'h' },
115: { "ignore-leading-blanks", no_argument, NULL, 'b' },
116: { "ignore-case", no_argument, NULL, 'f' },
117: { "ignore-nonprinting", no_argument, NULL, 'i' },
118: { "key", required_argument, NULL, 'k' },
119: { "merge", no_argument, NULL, 'm' },
120: { "mergesort", no_argument, NULL, 'H' },
121: { "mmap", no_argument, NULL, MMAP_OPT },
122: { "month-sort", no_argument, NULL, 'M' },
123: { "numeric-sort", no_argument, NULL, 'n' },
124: { "output", required_argument, NULL, 'o' },
125: { "qsort", no_argument, NULL, QSORT_OPT },
126: { "radixsort", no_argument, NULL, RADIXSORT_OPT },
127: { "random-sort", no_argument, NULL, 'R' },
128: { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
129: { "reverse", no_argument, NULL, 'r' },
130: { "sort", required_argument, NULL, SORT_OPT },
131: { "stable", no_argument, NULL, 's' },
132: { "temporary-directory", required_argument, NULL, 'T' },
133: { "unique", no_argument, NULL, 'u' },
134: { "version", no_argument, NULL, VERSION_OPT },
135: { "version-sort", no_argument, NULL, 'V' },
136: { "zero-terminated", no_argument, NULL, 'z' },
137: { NULL, no_argument, NULL, 0 }
138: };
139:
140: /*
141: * Check where sort modifier is present
142: */
143: static bool
144: sort_modifier_empty(struct sort_mods *sm)
145: {
146: if (sm == NULL)
147: return true;
148: return !(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
149: sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag);
150: }
151:
152: /*
153: * Print out usage text.
154: */
155: static __dead void
156: usage(int exit_val)
157: {
158: fprintf(exit_val ? stderr : stdout,
1.46 jmc 159: "usage: %s [-bCcdfgHhiMmnRrsuVz] [-k field1[,field2]] [-o output] "
1.45 jmc 160: "[-S size]\n\t[-T dir] [-t char] [file ...]\n", getprogname());
1.44 millert 161: exit(exit_val);
162: }
1.4 millert 163:
1.1 millert 164: /*
1.44 millert 165: * Read input file names from a file (file0-from option).
1.1 millert 166: */
1.44 millert 167: static void
168: read_fns_from_file0(const char *fn)
169: {
1.47 millert 170: FILE *f;
171: char *line = NULL;
172: size_t linesize = 0;
173: ssize_t linelen;
174:
175: if (fn == NULL)
176: return;
177:
178: f = fopen(fn, "r");
179: if (f == NULL)
180: err(2, "%s", fn);
181:
182: while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
183: if (*line != '\0') {
184: if (argc_from_file0 == (size_t)-1)
185: argc_from_file0 = 0;
186: ++argc_from_file0;
187: argv_from_file0 = sort_reallocarray(argv_from_file0,
188: argc_from_file0, sizeof(char *));
189: argv_from_file0[argc_from_file0 - 1] = line;
190: } else {
191: free(line);
1.44 millert 192: }
1.47 millert 193: line = NULL;
194: linesize = 0;
1.44 millert 195: }
1.47 millert 196: if (ferror(f))
197: err(2, "%s: getdelim", fn);
198:
199: closefile(f, fn);
1.44 millert 200: }
1.4 millert 201:
1.1 millert 202: /*
1.44 millert 203: * Check how much RAM is available for the sort.
1.1 millert 204: */
1.44 millert 205: static void
206: set_hw_params(void)
207: {
1.48 millert 208: long long user_memory;
209: struct rlimit rl;
210: size_t len;
211: int mib[] = { CTL_HW, HW_USERMEM64 };
212:
213: /* Get total user (non-kernel) memory. */
214: len = sizeof(user_memory);
215: if (sysctl(mib, 2, &user_memory, &len, NULL, 0) == -1)
216: user_memory = -1;
217:
218: /* Increase our data size to the max */
219: if (getrlimit(RLIMIT_DATA, &rl) == 0) {
220: free_memory = (unsigned long long)rl.rlim_cur;
221: rl.rlim_cur = rl.rlim_max;
222: if (setrlimit(RLIMIT_DATA, &rl) == 0) {
223: free_memory = (unsigned long long)rl.rlim_max;
224: } else {
225: warn("Can't set resource limit to max data size");
226: }
227: } else
228: warn("Can't get resource limit for data size");
1.1 millert 229:
1.48 millert 230: /* We prefer to use temp files rather than swap space. */
231: if (user_memory != -1 && free_memory > user_memory)
232: free_memory = user_memory;
1.44 millert 233:
234: available_free_memory = free_memory / 2;
235: }
236:
237: /*
238: * Convert "plain" symbol to wide symbol, with default value.
239: */
240: static void
241: conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
242: {
243: if (wc && c) {
244: int res;
1.1 millert 245:
1.44 millert 246: res = mbtowc(wc, c, MB_CUR_MAX);
247: if (res < 1)
248: *wc = def;
1.12 millert 249: }
1.44 millert 250: }
1.12 millert 251:
1.44 millert 252: /*
253: * Set current locale symbols.
254: */
255: static void
256: set_locale(void)
1.1 millert 257: {
1.44 millert 258: struct lconv *lc;
259: const char *locale;
1.4 millert 260:
1.16 ericj 261: setlocale(LC_ALL, "");
262:
1.44 millert 263: lc = localeconv();
264:
265: if (lc) {
266: /* obtain LC_NUMERIC info */
267: /* Convert to wide char form */
268: conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
269: symbol_decimal_point);
270: conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
271: symbol_thousands_sep);
272: conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
273: symbol_positive_sign);
274: conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
275: symbol_negative_sign);
276: }
277:
278: if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
279: gnusort_numeric_compatibility = true;
280:
281: locale = setlocale(LC_COLLATE, NULL);
282: if (locale != NULL) {
283: char *tmpl;
284: const char *byteclocale;
285:
286: tmpl = sort_strdup(locale);
287: byteclocale = setlocale(LC_COLLATE, "C");
288: if (byteclocale && strcmp(byteclocale, tmpl) == 0) {
289: byte_sort = true;
290: } else {
291: byteclocale = setlocale(LC_COLLATE, "POSIX");
292: if (byteclocale && strcmp(byteclocale, tmpl) == 0)
293: byte_sort = true;
294: else
295: setlocale(LC_COLLATE, tmpl);
296: }
297: sort_free(tmpl);
298: }
299: if (!byte_sort)
300: sort_mb_cur_max = MB_CUR_MAX;
301: }
302:
303: /*
304: * Set directory temporary files.
305: */
306: static void
307: set_tmpdir(void)
308: {
1.53 millert 309: if (!issetugid()) {
310: char *td;
1.44 millert 311:
1.53 millert 312: td = getenv("TMPDIR");
313: if (td != NULL)
314: tmpdir = sort_strdup(td);
315: }
1.44 millert 316: }
317:
318: /*
319: * Parse -S option.
320: */
321: static unsigned long long
322: parse_memory_buffer_value(const char *value)
323: {
324: if (value == NULL)
325: return available_free_memory;
326: else {
327: char *endptr;
328: unsigned long long membuf;
329:
330: membuf = strtoll(value, &endptr, 10);
1.55 millert 331: if (endptr == value || (long long)membuf < 0 ||
1.64 millert 332: (errno == ERANGE && membuf == LLONG_MAX))
1.55 millert 333: errx(2, "invalid memory buffer size: %s", value);
334:
335: switch (*endptr) {
336: case 'Y':
337: membuf *= 1024;
338: /* FALLTHROUGH */
339: case 'Z':
340: membuf *= 1024;
341: /* FALLTHROUGH */
342: case 'E':
343: membuf *= 1024;
344: /* FALLTHROUGH */
345: case 'P':
346: membuf *= 1024;
347: /* FALLTHROUGH */
348: case 'T':
349: membuf *= 1024;
350: /* FALLTHROUGH */
351: case 'G':
352: membuf *= 1024;
353: /* FALLTHROUGH */
354: case 'M':
355: membuf *= 1024;
356: /* FALLTHROUGH */
357: case '\0':
358: case 'K':
359: membuf *= 1024;
360: /* FALLTHROUGH */
361: case 'b':
362: break;
363: case '%':
364: membuf = (available_free_memory * membuf) /
365: 100;
366: break;
367: default:
368: warnc(EINVAL, "%s", optarg);
1.44 millert 369: membuf = available_free_memory;
370: }
371: return membuf;
372: }
373: }
374:
375: /*
376: * Signal handler that clears the temporary files.
377: */
378: static void
1.49 millert 379: sig_handler(int sig __unused)
1.44 millert 380: {
381: clear_tmp_files();
1.50 millert 382: _exit(2);
1.44 millert 383: }
384:
385: /*
386: * Set signal handler on panic signals.
387: */
388: static void
389: set_signal_handler(void)
390: {
391: struct sigaction sa;
1.49 millert 392: int i, signals[] = {SIGTERM, SIGHUP, SIGINT, SIGQUIT, SIGUSR1, SIGUSR2,
393: SIGPIPE, SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0};
1.44 millert 394:
395: memset(&sa, 0, sizeof(sa));
1.49 millert 396: sigfillset(&sa.sa_mask);
397: sa.sa_flags = SA_RESTART;
398: sa.sa_handler = sig_handler;
399:
400: for (i = 0; signals[i] != 0; i++) {
401: if (sigaction(signals[i], &sa, NULL) < 0) {
402: warn("sigaction(%d)", i);
403: continue;
404: }
1.44 millert 405: }
406: }
407:
408: /*
409: * Print "unknown" message and exit with status 2.
410: */
411: static void
412: unknown(const char *what)
413: {
414: errx(2, "Unknown feature: %s", what);
415: }
416:
417: /*
418: * Check whether contradictory input options are used.
419: */
420: static void
421: check_mutually_exclusive_flags(char c, bool *mef_flags)
422: {
423: int i, fo_index, mec;
424: bool found_others, found_this;
425:
426: found_others = found_this =false;
427: fo_index = 0;
428:
429: for (i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
430: mec = mutually_exclusive_flags[i];
431:
432: if (mec != c) {
433: if (mef_flags[i]) {
1.65 ! millert 434: if (found_this) {
! 435: errx(2,
! 436: "%c:%c: mutually exclusive flags",
! 437: c, mec);
! 438: }
1.44 millert 439: found_others = true;
440: fo_index = i;
441: }
442: } else {
1.65 ! millert 443: if (found_others) {
! 444: errx(2,
! 445: "%c:%c: mutually exclusive flags",
! 446: c, mutually_exclusive_flags[fo_index]);
! 447: }
1.44 millert 448: mef_flags[i] = true;
449: found_this = true;
450: }
451: }
452: }
453:
454: /*
455: * Initialise sort opts data.
456: */
457: static void
458: set_sort_opts(void)
459: {
460: memset(&default_sort_mods_object, 0,
461: sizeof(default_sort_mods_object));
462: memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
463: default_sort_mods_object.func =
464: get_sort_func(&default_sort_mods_object);
465: }
466:
467: /*
468: * Set a sort modifier on a sort modifiers object.
469: */
470: static bool
471: set_sort_modifier(struct sort_mods *sm, int c)
472: {
473: if (sm) {
474: switch (c){
475: case 'b':
476: sm->bflag = true;
1.1 millert 477: break;
478: case 'd':
1.44 millert 479: sm->dflag = true;
480: break;
1.4 millert 481: case 'f':
1.44 millert 482: sm->fflag = true;
483: break;
484: case 'g':
485: sm->gflag = true;
486: need_hint = true;
487: break;
1.1 millert 488: case 'i':
1.44 millert 489: sm->iflag = true;
1.1 millert 490: break;
491: case 'R':
1.44 millert 492: sm->Rflag = true;
493: need_random = true;
1.1 millert 494: break;
1.44 millert 495: case 'M':
496: initialise_months();
497: sm->Mflag = true;
498: need_hint = true;
1.1 millert 499: break;
1.44 millert 500: case 'n':
501: sm->nflag = true;
502: need_hint = true;
503: print_symbols_on_debug = true;
1.1 millert 504: break;
1.44 millert 505: case 'r':
506: sm->rflag = true;
1.1 millert 507: break;
1.44 millert 508: case 'V':
509: sm->Vflag = true;
1.1 millert 510: break;
1.44 millert 511: case 'h':
512: sm->hflag = true;
513: need_hint = true;
514: print_symbols_on_debug = true;
1.1 millert 515: break;
1.8 deraadt 516: default:
1.44 millert 517: return false;
1.1 millert 518: }
1.44 millert 519: sort_opts_vals.complex_sort = true;
520: sm->func = get_sort_func(sm);
521: }
522: return true;
523: }
524:
525: /*
526: * Parse POS in -k option.
527: */
528: static int
529: parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
530: {
531: regmatch_t pmatch[4];
532: regex_t re;
533: char *c, *f;
534: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
535: size_t len, nmatch;
536: int ret;
537:
538: ret = -1;
539: nmatch = 4;
540: c = f = NULL;
541:
542: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
543: return -1;
544:
545: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
546: goto end;
547:
548: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
549: goto end;
550:
551: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
552: goto end;
553:
554: len = pmatch[1].rm_eo - pmatch[1].rm_so;
555:
1.57 millert 556: f = sort_malloc(len + 1);
557: memcpy(f, s + pmatch[1].rm_so, len);
1.44 millert 558: f[len] = '\0';
559:
560: if (second) {
561: errno = 0;
1.58 millert 562: ks->f2 = (size_t)strtoul(f, NULL, 10);
1.44 millert 563: if (errno != 0)
1.58 millert 564: goto end;
1.44 millert 565: if (ks->f2 == 0) {
566: warn("0 field in key specs");
567: goto end;
568: }
569: } else {
570: errno = 0;
1.58 millert 571: ks->f1 = (size_t)strtoul(f, NULL, 10);
1.44 millert 572: if (errno != 0)
1.58 millert 573: goto end;
1.44 millert 574: if (ks->f1 == 0) {
575: warn("0 field in key specs");
576: goto end;
577: }
578: }
579:
580: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
581: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
582:
1.57 millert 583: c = sort_malloc(len + 1);
584: memcpy(c, s + pmatch[2].rm_so + 1, len);
1.44 millert 585: c[len] = '\0';
586:
587: if (second) {
588: errno = 0;
1.58 millert 589: ks->c2 = (size_t)strtoul(c, NULL, 10);
1.44 millert 590: if (errno != 0)
1.58 millert 591: goto end;
1.44 millert 592: } else {
593: errno = 0;
1.58 millert 594: ks->c1 = (size_t)strtoul(c, NULL, 10);
1.44 millert 595: if (errno != 0)
1.58 millert 596: goto end;
1.44 millert 597: if (ks->c1 == 0) {
598: warn("0 column in key specs");
599: goto end;
600: }
601: }
602: } else {
603: if (second)
604: ks->c2 = 0;
605: else
606: ks->c1 = 1;
607: }
608:
609: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
610: regoff_t i = 0;
611:
612: for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
613: check_mutually_exclusive_flags(s[i], mef_flags);
614: if (s[i] == 'b') {
615: if (second)
616: ks->pos2b = true;
617: else
618: ks->pos1b = true;
619: } else if (!set_sort_modifier(&(ks->sm), s[i]))
620: goto end;
621: }
622: }
623:
624: ret = 0;
625:
626: end:
627:
1.61 millert 628: sort_free(c);
629: sort_free(f);
1.44 millert 630: regfree(&re);
631:
632: return ret;
633: }
634:
635: /*
636: * Parse -k option value.
637: */
638: static int
639: parse_k(const char *s, struct key_specs *ks)
640: {
641: int ret = -1;
642: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
643: { false, false, false, false, false, false };
644:
645: if (s && *s) {
646: char *sptr;
647:
648: sptr = strchr(s, ',');
649: if (sptr) {
650: size_t size1;
651: char *pos1, *pos2;
652:
653: size1 = sptr - s;
654:
655: if (size1 < 1)
656: return -1;
657:
1.57 millert 658: pos1 = sort_malloc(size1 + 1);
659: memcpy(pos1, s, size1);
1.44 millert 660: pos1[size1] = '\0';
661:
662: ret = parse_pos(pos1, ks, mef_flags, false);
663:
664: sort_free(pos1);
665: if (ret < 0)
666: return ret;
667:
668: pos2 = sort_strdup(sptr + 1);
669: ret = parse_pos(pos2, ks, mef_flags, true);
670: sort_free(pos2);
671: } else
672: ret = parse_pos(s, ks, mef_flags, false);
1.1 millert 673: }
1.4 millert 674:
1.44 millert 675: return ret;
676: }
677:
678: /*
679: * Parse POS in +POS -POS option.
680: */
681: static int
1.56 millert 682: parse_pos_obs(const char *s, size_t *nf, size_t *nc, char *sopts,
683: size_t sopts_size)
1.44 millert 684: {
685: regex_t re;
686: regmatch_t pmatch[4];
687: char *c, *f;
688: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
689: int ret;
690: size_t len, nmatch;
691:
692: ret = -1;
693: nmatch = 4;
694: c = f = NULL;
695: *nc = *nf = 0;
696:
697: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
698: return -1;
699:
700: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
701: goto end;
702:
703: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
704: goto end;
705:
706: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
707: goto end;
708:
709: len = pmatch[1].rm_eo - pmatch[1].rm_so;
710:
1.57 millert 711: f = sort_malloc(len + 1);
712: memcpy(f, s + pmatch[1].rm_so, len);
1.44 millert 713: f[len] = '\0';
714:
715: errno = 0;
716: *nf = (size_t) strtoul(f, NULL, 10);
717: if (errno != 0)
718: errx(2, "Invalid key position");
719:
720: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
721: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
1.57 millert 722:
1.63 millert 723: c = sort_malloc(len + 1);
1.57 millert 724: memcpy(c, s + pmatch[2].rm_so + 1, len);
1.44 millert 725: c[len] = '\0';
726:
727: errno = 0;
728: *nc = (size_t) strtoul(c, NULL, 10);
729: if (errno != 0)
730: errx(2, "Invalid key position");
731: }
732:
733: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
734:
735: len = pmatch[3].rm_eo - pmatch[3].rm_so;
1.4 millert 736:
1.56 millert 737: if (len >= sopts_size)
738: errx(2, "Invalid key position");
1.57 millert 739: memcpy(sopts, s + pmatch[3].rm_so, len);
1.44 millert 740: sopts[len] = '\0';
1.1 millert 741: }
1.4 millert 742:
1.44 millert 743: ret = 0;
1.4 millert 744:
1.44 millert 745: end:
1.61 millert 746: sort_free(c);
747: sort_free(f);
1.44 millert 748: regfree(&re);
749:
750: return ret;
751: }
752:
753: /*
754: * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
755: */
756: static void
757: fix_obsolete_keys(int *argc, char **argv)
758: {
759: char sopt[129];
760: int i;
761:
762: for (i = 1; i < *argc; i++) {
1.60 millert 763: const char *arg1 = argv[i];
1.44 millert 764:
1.60 millert 765: if (arg1[0] == '+') {
1.56 millert 766: size_t c1, f1;
1.44 millert 767: char sopts1[128];
768:
769: sopts1[0] = 0;
770: c1 = f1 = 0;
771:
1.56 millert 772: if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1,
773: sizeof(sopts1)) < 0)
1.44 millert 774: continue;
1.60 millert 775:
776: f1 += 1;
777: c1 += 1;
778: if (i + 1 < *argc) {
779: const char *arg2 = argv[i + 1];
780:
781: if (arg2[0] == '-') {
782: size_t c2, f2;
783: char sopts2[128];
784:
785: sopts2[0] = 0;
786: c2 = f2 = 0;
787:
788: if (parse_pos_obs(arg2 + 1, &f2, &c2,
789: sopts2, sizeof(sopts2)) >= 0) {
790: int j;
791: if (c2 > 0)
792: f2 += 1;
793: snprintf(sopt, sizeof(sopt),
794: "-k%zu.%zu%s,%zu.%zu%s",
795: f1, c1, sopts1, f2,
796: c2, sopts2);
797: argv[i] = sort_strdup(sopt);
798: for (j = i + 1; j + 1 < *argc; j++)
799: argv[j] = argv[j + 1];
800: *argc -= 1;
801: continue;
1.44 millert 802: }
803: }
1.1 millert 804: }
1.60 millert 805: snprintf(sopt, sizeof(sopt), "-k%zu.%zu%s",
806: f1, c1, sopts1);
807: argv[i] = sort_strdup(sopt);
1.44 millert 808: }
1.1 millert 809: }
1.44 millert 810: }
811:
812: /*
813: * Set random seed
814: */
815: static void
816: set_random_seed(void)
817: {
818: if (!need_random)
819: return;
1.4 millert 820:
1.44 millert 821: MD5Init(&md5_ctx);
822: if (random_source != NULL) {
823: unsigned char buf[BUFSIZ];
824: size_t nr;
825: FILE *fp;
826:
827: if ((fp = fopen(random_source, "r")) == NULL)
828: err(2, "%s", random_source);
829: while ((nr = fread(buf, 1, sizeof(buf), fp)) != 0)
830: MD5Update(&md5_ctx, buf, nr);
831: if (ferror(fp))
832: err(2, "%s", random_source);
833: fclose(fp);
1.1 millert 834: } else {
1.44 millert 835: unsigned char rsd[1024];
836:
837: arc4random_buf(rsd, sizeof(rsd));
838: MD5Update(&md5_ctx, rsd, sizeof(rsd));
839: }
840: }
841:
842: /*
843: * Main function.
844: */
845: int
846: main(int argc, char *argv[])
847: {
848: char *outfile, *real_outfile;
849: int c, result;
850: size_t i;
851: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
852: { false, false, false, false, false, false };
853:
854: result = 0;
1.51 millert 855: outfile = "-";
1.44 millert 856: real_outfile = NULL;
857:
858: struct sort_mods *sm = &default_sort_mods_object;
859:
860: init_tmp_files();
861:
862: set_signal_handler();
863:
1.51 millert 864: atexit(clear_tmp_files);
865:
1.44 millert 866: set_hw_params();
867: set_locale();
868: set_tmpdir();
869: set_sort_opts();
870:
871: fix_obsolete_keys(&argc, argv);
872:
873: while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
874: != -1)) {
875:
876: check_mutually_exclusive_flags(c, mef_flags);
877:
878: if (!set_sort_modifier(sm, c)) {
879:
880: switch (c) {
881: case 'c':
882: sort_opts_vals.cflag = true;
883: if (optarg) {
884: if (!strcmp(optarg, "diagnose-first"))
885: ;
886: else if (!strcmp(optarg, "silent") ||
887: !strcmp(optarg, "quiet"))
888: sort_opts_vals.csilentflag = true;
889: else if (*optarg)
890: unknown(optarg);
891: }
892: break;
893: case 'C':
894: sort_opts_vals.cflag = true;
895: sort_opts_vals.csilentflag = true;
896: break;
897: case 'k':
898: {
899: sort_opts_vals.complex_sort = true;
900: sort_opts_vals.kflag = true;
901:
902: keys_num++;
903: keys = sort_reallocarray(keys, keys_num,
904: sizeof(struct key_specs));
905: memset(&(keys[keys_num - 1]), 0,
906: sizeof(struct key_specs));
907:
908: if (parse_k(optarg, &(keys[keys_num - 1]))
909: < 0) {
910: errc(2, EINVAL, "-k %s", optarg);
911: }
912:
913: break;
914: }
915: case 'm':
916: sort_opts_vals.mflag = true;
917: break;
918: case 'o':
1.51 millert 919: outfile = optarg;
1.44 millert 920: break;
921: case 's':
922: sort_opts_vals.sflag = true;
923: break;
924: case 'S':
925: available_free_memory =
926: parse_memory_buffer_value(optarg);
927: break;
928: case 'T':
929: tmpdir = sort_strdup(optarg);
930: break;
931: case 't':
932: while (strlen(optarg) > 1) {
933: if (optarg[0] != '\\') {
934: errc(2, EINVAL, "%s", optarg);
935: }
936: optarg += 1;
937: if (*optarg == '0') {
938: *optarg = 0;
939: break;
940: }
941: }
942: sort_opts_vals.tflag = true;
943: sort_opts_vals.field_sep = btowc(optarg[0]);
944: if (sort_opts_vals.field_sep == WEOF) {
945: errno = EINVAL;
946: err(2, NULL);
947: }
948: if (!gnusort_numeric_compatibility) {
949: if (symbol_decimal_point == sort_opts_vals.field_sep)
950: symbol_decimal_point = WEOF;
951: if (symbol_thousands_sep == sort_opts_vals.field_sep)
952: symbol_thousands_sep = WEOF;
953: if (symbol_negative_sign == sort_opts_vals.field_sep)
954: symbol_negative_sign = WEOF;
955: if (symbol_positive_sign == sort_opts_vals.field_sep)
956: symbol_positive_sign = WEOF;
957: }
958: break;
959: case 'u':
960: sort_opts_vals.uflag = true;
961: /* stable sort for the correct unique val */
962: sort_opts_vals.sflag = true;
963: break;
964: case 'z':
965: sort_opts_vals.zflag = true;
966: break;
967: case SORT_OPT:
1.62 millert 968: if (!strcmp(optarg, "general-numeric"))
969: set_sort_modifier(sm, 'g');
970: else if (!strcmp(optarg, "human-numeric"))
971: set_sort_modifier(sm, 'h');
972: else if (!strcmp(optarg, "numeric"))
973: set_sort_modifier(sm, 'n');
974: else if (!strcmp(optarg, "month"))
975: set_sort_modifier(sm, 'M');
976: else if (!strcmp(optarg, "random"))
977: set_sort_modifier(sm, 'R');
978: else
979: unknown(optarg);
1.44 millert 980: break;
981: case QSORT_OPT:
982: sort_opts_vals.sort_method = SORT_QSORT;
983: break;
984: case 'H':
985: sort_opts_vals.sort_method = SORT_MERGESORT;
986: break;
987: case MMAP_OPT:
988: use_mmap = true;
989: break;
990: case HEAPSORT_OPT:
991: sort_opts_vals.sort_method = SORT_HEAPSORT;
992: break;
993: case RADIXSORT_OPT:
994: sort_opts_vals.sort_method = SORT_RADIXSORT;
995: break;
996: case RANDOMSOURCE_OPT:
997: random_source = strdup(optarg);
998: break;
999: case COMPRESSPROGRAM_OPT:
1000: compress_program = strdup(optarg);
1001: break;
1002: case FF_OPT:
1003: read_fns_from_file0(optarg);
1004: break;
1005: case BS_OPT:
1006: {
1.54 millert 1007: const char *errstr;
1008:
1009: max_open_files = strtonum(optarg, 2,
1010: UINT_MAX - 1, &errstr) + 1;
1011: if (errstr != NULL)
1012: errx(2, "--batch-size argument is %s",
1013: errstr);
1014: break;
1.44 millert 1015: }
1016: case VERSION_OPT:
1017: printf("%s\n", VERSION);
1018: exit(EXIT_SUCCESS);
1019: /* NOTREACHED */
1020: break;
1021: case DEBUG_OPT:
1022: debug_sort = true;
1023: break;
1024: case HELP_OPT:
1025: usage(0);
1026: /* NOTREACHED */
1027: break;
1028: default:
1029: usage(2);
1030: /* NOTREACHED */
1031: }
1032: }
1033: }
1034:
1035: argc -= optind;
1036: argv += optind;
1037:
1038: if (keys_num == 0) {
1039: keys_num = 1;
1040: keys = sort_realloc(keys, sizeof(struct key_specs));
1041: memset(&(keys[0]), 0, sizeof(struct key_specs));
1042: keys[0].c1 = 1;
1043: keys[0].pos1b = default_sort_mods->bflag;
1044: keys[0].pos2b = default_sort_mods->bflag;
1045: memcpy(&(keys[0].sm), default_sort_mods,
1046: sizeof(struct sort_mods));
1047: }
1048:
1049: for (i = 0; i < keys_num; i++) {
1050: struct key_specs *ks;
1051:
1052: ks = &(keys[i]);
1053:
1054: if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1055: !(ks->pos2b)) {
1056: ks->pos1b = sm->bflag;
1057: ks->pos2b = sm->bflag;
1058: memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1059: }
1060:
1061: ks->sm.func = get_sort_func(&(ks->sm));
1062: }
1063:
1064: if (argv_from_file0) {
1065: argc = argc_from_file0;
1066: argv = argv_from_file0;
1067: }
1068:
1069: if (debug_sort) {
1070: printf("Memory to be used for sorting: %llu\n",
1071: available_free_memory);
1072: printf("Using collate rules of %s locale\n",
1073: setlocale(LC_COLLATE, NULL));
1074: if (byte_sort)
1075: printf("Byte sort is used\n");
1076: if (print_symbols_on_debug) {
1077: printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1078: if (symbol_thousands_sep)
1079: printf("Thousands separator: <%lc>\n",
1080: symbol_thousands_sep);
1081: printf("Positive sign: <%lc>\n", symbol_positive_sign);
1082: printf("Negative sign: <%lc>\n", symbol_negative_sign);
1083: }
1084: }
1085:
1086: set_random_seed();
1.4 millert 1087:
1.44 millert 1088: /* Case when the outfile equals one of the input files: */
1.51 millert 1089: if (strcmp(outfile, "-") != 0) {
1090: struct stat sb;
1091: int fd, i;
1.44 millert 1092:
1093: for (i = 0; i < argc; ++i) {
1094: if (strcmp(argv[i], outfile) == 0) {
1.51 millert 1095: if (stat(outfile, &sb) == -1)
1096: err(2, "%s", outfile);
1097: if (access(outfile, W_OK) == -1)
1098: err(2, "%s", outfile);
1099: real_outfile = outfile;
1100: sort_asprintf(&outfile, "%s.XXXXXXXXXX",
1101: real_outfile);
1102: if ((fd = mkstemp(outfile)) == -1 ||
1103: fchmod(fd, sb.st_mode & ALLPERMS) == -1)
1104: err(2, "%s", outfile);
1105: close(fd);
1.44 millert 1106: tmp_file_atexit(outfile);
1.51 millert 1107: break;
1.44 millert 1108: }
1109: }
1110: }
1111:
1112: if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1113: struct file_list fl;
1114: struct sort_list list;
1115:
1116: sort_list_init(&list);
1117: file_list_init(&fl, true);
1118:
1119: if (argc < 1)
1120: procfile("-", &list, &fl);
1121: else {
1122: while (argc > 0) {
1123: procfile(*argv, &list, &fl);
1124: --argc;
1125: ++argv;
1126: }
1127: }
1128:
1129: if (fl.count < 1)
1130: sort_list_to_file(&list, outfile);
1131: else {
1132: if (list.count > 0) {
1133: char *flast = new_tmp_file_name();
1134:
1135: sort_list_to_file(&list, flast);
1136: file_list_add(&fl, flast, false);
1137: }
1138: merge_files(&fl, outfile);
1139: }
1140:
1141: file_list_clean(&fl);
1142:
1143: /*
1144: * We are about to exit the program, so we can ignore
1145: * the clean-up for speed
1146: *
1147: * sort_list_clean(&list);
1148: */
1149:
1150: } else if (sort_opts_vals.cflag) {
1151: result = (argc == 0) ? (check("-")) : (check(*argv));
1152: } else if (sort_opts_vals.mflag) {
1153: struct file_list fl;
1154:
1155: file_list_init(&fl, false);
1156: file_list_populate(&fl, argc, argv, true);
1157: merge_files(&fl, outfile);
1158: file_list_clean(&fl);
1159: }
1160:
1161: if (real_outfile) {
1162: if (rename(outfile, real_outfile) < 0)
1163: err(2, "%s", real_outfile);
1.51 millert 1164: sort_free(outfile);
1.44 millert 1165: }
1.4 millert 1166:
1.44 millert 1167: return result;
1.1 millert 1168: }