Annotation of src/usr.bin/sort/sort.c, Revision 1.69
1.69 ! millert 1: /* $OpenBSD: sort.c,v 1.68 2015/04/01 22:49:47 millert Exp $ */
1.1 millert 2:
3: /*-
1.44 millert 4: * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6: * All rights reserved.
1.1 millert 7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
1.44 millert 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1.1 millert 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.44 millert 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1.1 millert 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
1.48 millert 30: #include <sys/resource.h>
1.44 millert 31: #include <sys/stat.h>
1.48 millert 32: #include <sys/sysctl.h>
1.44 millert 33: #include <sys/types.h>
1.1 millert 34:
1.44 millert 35: #include <err.h>
36: #include <errno.h>
37: #include <getopt.h>
38: #include <limits.h>
1.16 ericj 39: #include <locale.h>
1.44 millert 40: #include <md5.h>
41: #include <regex.h>
1.1 millert 42: #include <signal.h>
1.44 millert 43: #include <stdbool.h>
44: #include <stdio.h>
1.1 millert 45: #include <stdlib.h>
46: #include <string.h>
47: #include <unistd.h>
1.44 millert 48: #include <wchar.h>
49: #include <wctype.h>
50:
51: #include "coll.h"
52: #include "file.h"
53: #include "sort.h"
54:
55: #define OPTIONS "bCcdfgHhik:Mmno:RrS:st:T:uVz"
56:
57: static bool need_random;
58: static const char *random_source;
59:
60: MD5_CTX md5_ctx;
61:
62: struct sort_opts sort_opts_vals;
63:
64: bool debug_sort;
65: bool need_hint;
66:
67: static bool gnusort_numeric_compatibility;
68:
69: static struct sort_mods default_sort_mods_object;
70: struct sort_mods * const default_sort_mods = &default_sort_mods_object;
71:
72: static bool print_symbols_on_debug;
73:
74: /*
75: * Arguments from file (when file0-from option is used:
76: */
77: static size_t argc_from_file0 = (size_t)-1;
78: static char **argv_from_file0;
79:
80: /*
81: * Placeholder symbols for options which have no single-character equivalent
82: */
83: enum {
84: SORT_OPT = CHAR_MAX + 1,
85: HELP_OPT,
86: FF_OPT,
87: BS_OPT,
88: VERSION_OPT,
89: DEBUG_OPT,
90: RANDOMSOURCE_OPT,
91: COMPRESSPROGRAM_OPT,
92: QSORT_OPT,
93: HEAPSORT_OPT,
94: RADIXSORT_OPT,
95: MMAP_OPT
96: };
97:
98: #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
99: static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
100:
101: static const struct option long_options[] = {
102: { "batch-size", required_argument, NULL, BS_OPT },
103: { "buffer-size", required_argument, NULL, 'S' },
104: { "check", optional_argument, NULL, 'c' },
105: { "check=silent|quiet", optional_argument, NULL, 'C' },
106: { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
107: { "debug", no_argument, NULL, DEBUG_OPT },
108: { "dictionary-order", no_argument, NULL, 'd' },
109: { "field-separator", required_argument, NULL, 't' },
110: { "files0-from", required_argument, NULL, FF_OPT },
111: { "general-numeric-sort", no_argument, NULL, 'g' },
112: { "heapsort", no_argument, NULL, HEAPSORT_OPT },
113: { "help", no_argument, NULL, HELP_OPT },
114: { "human-numeric-sort", no_argument, NULL, 'h' },
115: { "ignore-leading-blanks", no_argument, NULL, 'b' },
116: { "ignore-case", no_argument, NULL, 'f' },
117: { "ignore-nonprinting", no_argument, NULL, 'i' },
118: { "key", required_argument, NULL, 'k' },
119: { "merge", no_argument, NULL, 'm' },
120: { "mergesort", no_argument, NULL, 'H' },
121: { "mmap", no_argument, NULL, MMAP_OPT },
122: { "month-sort", no_argument, NULL, 'M' },
123: { "numeric-sort", no_argument, NULL, 'n' },
124: { "output", required_argument, NULL, 'o' },
125: { "qsort", no_argument, NULL, QSORT_OPT },
126: { "radixsort", no_argument, NULL, RADIXSORT_OPT },
127: { "random-sort", no_argument, NULL, 'R' },
128: { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
129: { "reverse", no_argument, NULL, 'r' },
130: { "sort", required_argument, NULL, SORT_OPT },
131: { "stable", no_argument, NULL, 's' },
132: { "temporary-directory", required_argument, NULL, 'T' },
133: { "unique", no_argument, NULL, 'u' },
134: { "version", no_argument, NULL, VERSION_OPT },
135: { "version-sort", no_argument, NULL, 'V' },
136: { "zero-terminated", no_argument, NULL, 'z' },
137: { NULL, no_argument, NULL, 0 }
138: };
139:
140: /*
141: * Check where sort modifier is present
142: */
143: static bool
144: sort_modifier_empty(struct sort_mods *sm)
145: {
146: return !(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
147: sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag);
148: }
149:
150: /*
151: * Print out usage text.
152: */
153: static __dead void
154: usage(int exit_val)
155: {
156: fprintf(exit_val ? stderr : stdout,
1.46 jmc 157: "usage: %s [-bCcdfgHhiMmnRrsuVz] [-k field1[,field2]] [-o output] "
1.45 jmc 158: "[-S size]\n\t[-T dir] [-t char] [file ...]\n", getprogname());
1.44 millert 159: exit(exit_val);
160: }
1.4 millert 161:
1.1 millert 162: /*
1.44 millert 163: * Read input file names from a file (file0-from option).
1.1 millert 164: */
1.44 millert 165: static void
166: read_fns_from_file0(const char *fn)
167: {
1.47 millert 168: FILE *f;
169: char *line = NULL;
170: size_t linesize = 0;
171: ssize_t linelen;
172:
173: f = fopen(fn, "r");
174: if (f == NULL)
175: err(2, "%s", fn);
176:
177: while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
178: if (*line != '\0') {
179: if (argc_from_file0 == (size_t)-1)
180: argc_from_file0 = 0;
181: ++argc_from_file0;
182: argv_from_file0 = sort_reallocarray(argv_from_file0,
183: argc_from_file0, sizeof(char *));
184: argv_from_file0[argc_from_file0 - 1] = line;
185: } else {
186: free(line);
1.44 millert 187: }
1.47 millert 188: line = NULL;
189: linesize = 0;
1.44 millert 190: }
1.47 millert 191: if (ferror(f))
192: err(2, "%s: getdelim", fn);
193:
194: closefile(f, fn);
1.44 millert 195: }
1.4 millert 196:
1.1 millert 197: /*
1.44 millert 198: * Check how much RAM is available for the sort.
1.1 millert 199: */
1.44 millert 200: static void
201: set_hw_params(void)
202: {
1.48 millert 203: long long user_memory;
204: struct rlimit rl;
205: size_t len;
206: int mib[] = { CTL_HW, HW_USERMEM64 };
207:
208: /* Get total user (non-kernel) memory. */
209: len = sizeof(user_memory);
210: if (sysctl(mib, 2, &user_memory, &len, NULL, 0) == -1)
211: user_memory = -1;
212:
213: /* Increase our data size to the max */
214: if (getrlimit(RLIMIT_DATA, &rl) == 0) {
215: free_memory = (unsigned long long)rl.rlim_cur;
216: rl.rlim_cur = rl.rlim_max;
217: if (setrlimit(RLIMIT_DATA, &rl) == 0) {
218: free_memory = (unsigned long long)rl.rlim_max;
219: } else {
220: warn("Can't set resource limit to max data size");
221: }
222: } else
223: warn("Can't get resource limit for data size");
1.1 millert 224:
1.48 millert 225: /* We prefer to use temp files rather than swap space. */
226: if (user_memory != -1 && free_memory > user_memory)
227: free_memory = user_memory;
1.44 millert 228:
229: available_free_memory = free_memory / 2;
230: }
231:
232: /*
233: * Convert "plain" symbol to wide symbol, with default value.
234: */
235: static void
236: conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
237: {
1.66 millert 238: int res;
1.1 millert 239:
1.66 millert 240: res = mbtowc(wc, c, MB_CUR_MAX);
241: if (res < 1)
242: *wc = def;
1.44 millert 243: }
1.12 millert 244:
1.44 millert 245: /*
246: * Set current locale symbols.
247: */
248: static void
249: set_locale(void)
1.1 millert 250: {
1.44 millert 251: struct lconv *lc;
252: const char *locale;
1.4 millert 253:
1.16 ericj 254: setlocale(LC_ALL, "");
255:
1.66 millert 256: /* Obtain LC_NUMERIC info */
1.44 millert 257: lc = localeconv();
258:
1.66 millert 259: /* Convert to wide char form */
260: conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
261: symbol_decimal_point);
262: conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
263: symbol_thousands_sep);
264: conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
265: symbol_positive_sign);
266: conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
267: symbol_negative_sign);
1.44 millert 268:
269: if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
270: gnusort_numeric_compatibility = true;
271:
272: locale = setlocale(LC_COLLATE, NULL);
273: if (locale != NULL) {
274: char *tmpl;
275: const char *byteclocale;
276:
277: tmpl = sort_strdup(locale);
278: byteclocale = setlocale(LC_COLLATE, "C");
279: if (byteclocale && strcmp(byteclocale, tmpl) == 0) {
280: byte_sort = true;
281: } else {
282: byteclocale = setlocale(LC_COLLATE, "POSIX");
283: if (byteclocale && strcmp(byteclocale, tmpl) == 0)
284: byte_sort = true;
285: else
286: setlocale(LC_COLLATE, tmpl);
287: }
288: sort_free(tmpl);
289: }
290: if (!byte_sort)
291: sort_mb_cur_max = MB_CUR_MAX;
292: }
293:
294: /*
295: * Set directory temporary files.
296: */
297: static void
298: set_tmpdir(void)
299: {
1.53 millert 300: if (!issetugid()) {
301: char *td;
1.44 millert 302:
1.53 millert 303: td = getenv("TMPDIR");
304: if (td != NULL)
305: tmpdir = sort_strdup(td);
306: }
1.44 millert 307: }
308:
309: /*
310: * Parse -S option.
311: */
312: static unsigned long long
313: parse_memory_buffer_value(const char *value)
314: {
1.66 millert 315: char *endptr;
316: unsigned long long membuf;
317:
318: membuf = strtoll(value, &endptr, 10);
319: if (endptr == value || (long long)membuf < 0 ||
320: (errno == ERANGE && membuf == LLONG_MAX))
1.68 millert 321: goto invalid;
1.66 millert 322:
323: switch (*endptr) {
324: case 'Y':
1.68 millert 325: if (membuf > ULLONG_MAX / 1024)
326: goto invalid;
1.66 millert 327: membuf *= 1024;
328: /* FALLTHROUGH */
329: case 'Z':
1.68 millert 330: if (membuf > ULLONG_MAX / 1024)
331: goto invalid;
1.66 millert 332: membuf *= 1024;
333: /* FALLTHROUGH */
334: case 'E':
1.68 millert 335: if (membuf > ULLONG_MAX / 1024)
336: goto invalid;
1.66 millert 337: membuf *= 1024;
338: /* FALLTHROUGH */
339: case 'P':
1.68 millert 340: if (membuf > ULLONG_MAX / 1024)
341: goto invalid;
1.66 millert 342: membuf *= 1024;
343: /* FALLTHROUGH */
344: case 'T':
1.68 millert 345: if (membuf > ULLONG_MAX / 1024)
346: goto invalid;
1.66 millert 347: membuf *= 1024;
348: /* FALLTHROUGH */
349: case 'G':
1.68 millert 350: if (membuf > ULLONG_MAX / 1024)
351: goto invalid;
1.66 millert 352: membuf *= 1024;
353: /* FALLTHROUGH */
354: case 'M':
1.68 millert 355: if (membuf > ULLONG_MAX / 1024)
356: goto invalid;
1.66 millert 357: membuf *= 1024;
358: /* FALLTHROUGH */
359: case '\0':
360: case 'K':
1.68 millert 361: if (membuf > ULLONG_MAX / 1024)
362: goto invalid;
1.66 millert 363: membuf *= 1024;
364: /* FALLTHROUGH */
365: case 'b':
366: break;
367: case '%':
368: membuf = (available_free_memory * membuf) /
369: 100;
370: break;
371: default:
372: warnc(EINVAL, "%s", optarg);
373: membuf = available_free_memory;
1.44 millert 374: }
1.66 millert 375: return membuf;
1.68 millert 376: invalid:
377: errx(2, "invalid memory buffer size: %s", value);
1.44 millert 378: }
379:
380: /*
381: * Signal handler that clears the temporary files.
382: */
383: static void
1.49 millert 384: sig_handler(int sig __unused)
1.44 millert 385: {
386: clear_tmp_files();
1.50 millert 387: _exit(2);
1.44 millert 388: }
389:
390: /*
391: * Set signal handler on panic signals.
392: */
393: static void
394: set_signal_handler(void)
395: {
396: struct sigaction sa;
1.69 ! millert 397: int i, signals[] = {SIGTERM, SIGHUP, SIGINT, SIGUSR1, SIGUSR2,
! 398: SIGPIPE, SIGXCPU, SIGXFSZ, 0};
1.44 millert 399:
400: memset(&sa, 0, sizeof(sa));
1.49 millert 401: sigfillset(&sa.sa_mask);
402: sa.sa_flags = SA_RESTART;
403: sa.sa_handler = sig_handler;
404:
405: for (i = 0; signals[i] != 0; i++) {
406: if (sigaction(signals[i], &sa, NULL) < 0) {
407: warn("sigaction(%d)", i);
408: continue;
409: }
1.44 millert 410: }
411: }
412:
413: /*
414: * Print "unknown" message and exit with status 2.
415: */
416: static void
417: unknown(const char *what)
418: {
419: errx(2, "Unknown feature: %s", what);
420: }
421:
422: /*
423: * Check whether contradictory input options are used.
424: */
425: static void
426: check_mutually_exclusive_flags(char c, bool *mef_flags)
427: {
428: int i, fo_index, mec;
429: bool found_others, found_this;
430:
431: found_others = found_this =false;
432: fo_index = 0;
433:
434: for (i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
435: mec = mutually_exclusive_flags[i];
436:
437: if (mec != c) {
438: if (mef_flags[i]) {
1.65 millert 439: if (found_this) {
440: errx(2,
441: "%c:%c: mutually exclusive flags",
442: c, mec);
443: }
1.44 millert 444: found_others = true;
445: fo_index = i;
446: }
447: } else {
1.65 millert 448: if (found_others) {
449: errx(2,
450: "%c:%c: mutually exclusive flags",
451: c, mutually_exclusive_flags[fo_index]);
452: }
1.44 millert 453: mef_flags[i] = true;
454: found_this = true;
455: }
456: }
457: }
458:
459: /*
460: * Initialise sort opts data.
461: */
462: static void
463: set_sort_opts(void)
464: {
465: memset(&default_sort_mods_object, 0,
466: sizeof(default_sort_mods_object));
467: memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
468: default_sort_mods_object.func =
469: get_sort_func(&default_sort_mods_object);
470: }
471:
472: /*
473: * Set a sort modifier on a sort modifiers object.
474: */
475: static bool
476: set_sort_modifier(struct sort_mods *sm, int c)
477: {
1.66 millert 478: switch (c) {
479: case 'b':
480: sm->bflag = true;
481: break;
482: case 'd':
483: sm->dflag = true;
484: break;
485: case 'f':
486: sm->fflag = true;
487: break;
488: case 'g':
489: sm->gflag = true;
490: need_hint = true;
491: break;
492: case 'i':
493: sm->iflag = true;
494: break;
495: case 'R':
496: sm->Rflag = true;
497: need_random = true;
498: break;
499: case 'M':
500: initialise_months();
501: sm->Mflag = true;
502: need_hint = true;
503: break;
504: case 'n':
505: sm->nflag = true;
506: need_hint = true;
507: print_symbols_on_debug = true;
508: break;
509: case 'r':
510: sm->rflag = true;
511: break;
512: case 'V':
513: sm->Vflag = true;
514: break;
515: case 'h':
516: sm->hflag = true;
517: need_hint = true;
518: print_symbols_on_debug = true;
519: break;
520: default:
521: return false;
1.44 millert 522: }
1.66 millert 523: sort_opts_vals.complex_sort = true;
524: sm->func = get_sort_func(sm);
525:
1.44 millert 526: return true;
527: }
528:
529: /*
530: * Parse POS in -k option.
531: */
532: static int
533: parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
534: {
535: regmatch_t pmatch[4];
536: regex_t re;
537: char *c, *f;
538: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
539: size_t len, nmatch;
540: int ret;
541:
542: ret = -1;
543: nmatch = 4;
544: c = f = NULL;
545:
546: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
547: return -1;
548:
549: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
550: goto end;
551:
552: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
553: goto end;
554:
555: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
556: goto end;
557:
558: len = pmatch[1].rm_eo - pmatch[1].rm_so;
559:
1.57 millert 560: f = sort_malloc(len + 1);
561: memcpy(f, s + pmatch[1].rm_so, len);
1.44 millert 562: f[len] = '\0';
563:
564: if (second) {
565: errno = 0;
1.58 millert 566: ks->f2 = (size_t)strtoul(f, NULL, 10);
1.44 millert 567: if (errno != 0)
1.58 millert 568: goto end;
1.44 millert 569: if (ks->f2 == 0) {
570: warn("0 field in key specs");
571: goto end;
572: }
573: } else {
574: errno = 0;
1.58 millert 575: ks->f1 = (size_t)strtoul(f, NULL, 10);
1.44 millert 576: if (errno != 0)
1.58 millert 577: goto end;
1.44 millert 578: if (ks->f1 == 0) {
579: warn("0 field in key specs");
580: goto end;
581: }
582: }
583:
584: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
585: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
586:
1.57 millert 587: c = sort_malloc(len + 1);
588: memcpy(c, s + pmatch[2].rm_so + 1, len);
1.44 millert 589: c[len] = '\0';
590:
591: if (second) {
592: errno = 0;
1.58 millert 593: ks->c2 = (size_t)strtoul(c, NULL, 10);
1.44 millert 594: if (errno != 0)
1.58 millert 595: goto end;
1.44 millert 596: } else {
597: errno = 0;
1.58 millert 598: ks->c1 = (size_t)strtoul(c, NULL, 10);
1.44 millert 599: if (errno != 0)
1.58 millert 600: goto end;
1.44 millert 601: if (ks->c1 == 0) {
602: warn("0 column in key specs");
603: goto end;
604: }
605: }
606: } else {
607: if (second)
608: ks->c2 = 0;
609: else
610: ks->c1 = 1;
611: }
612:
613: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
614: regoff_t i = 0;
615:
616: for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
617: check_mutually_exclusive_flags(s[i], mef_flags);
618: if (s[i] == 'b') {
619: if (second)
620: ks->pos2b = true;
621: else
622: ks->pos1b = true;
623: } else if (!set_sort_modifier(&(ks->sm), s[i]))
624: goto end;
625: }
626: }
627:
628: ret = 0;
629:
630: end:
1.61 millert 631: sort_free(c);
632: sort_free(f);
1.44 millert 633: regfree(&re);
634:
635: return ret;
636: }
637:
638: /*
639: * Parse -k option value.
640: */
641: static int
642: parse_k(const char *s, struct key_specs *ks)
643: {
644: int ret = -1;
645: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
646: { false, false, false, false, false, false };
647:
1.66 millert 648: if (*s != '\0') {
1.44 millert 649: char *sptr;
650:
651: sptr = strchr(s, ',');
652: if (sptr) {
653: size_t size1;
654: char *pos1, *pos2;
655:
656: size1 = sptr - s;
657:
658: if (size1 < 1)
659: return -1;
660:
1.57 millert 661: pos1 = sort_malloc(size1 + 1);
662: memcpy(pos1, s, size1);
1.44 millert 663: pos1[size1] = '\0';
664:
665: ret = parse_pos(pos1, ks, mef_flags, false);
666:
667: sort_free(pos1);
668: if (ret < 0)
669: return ret;
670:
671: pos2 = sort_strdup(sptr + 1);
672: ret = parse_pos(pos2, ks, mef_flags, true);
673: sort_free(pos2);
674: } else
675: ret = parse_pos(s, ks, mef_flags, false);
1.1 millert 676: }
1.4 millert 677:
1.44 millert 678: return ret;
679: }
680:
681: /*
682: * Parse POS in +POS -POS option.
683: */
684: static int
1.66 millert 685: parse_pos_obs(const char *s, size_t *nf, size_t *nc, char *sopts, size_t sopts_size)
1.44 millert 686: {
687: regex_t re;
688: regmatch_t pmatch[4];
689: char *c, *f;
690: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
691: int ret;
692: size_t len, nmatch;
693:
694: ret = -1;
695: nmatch = 4;
696: c = f = NULL;
697: *nc = *nf = 0;
698:
699: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
700: return -1;
701:
702: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
703: goto end;
704:
705: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
706: goto end;
707:
708: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
709: goto end;
710:
711: len = pmatch[1].rm_eo - pmatch[1].rm_so;
712:
1.57 millert 713: f = sort_malloc(len + 1);
714: memcpy(f, s + pmatch[1].rm_so, len);
1.44 millert 715: f[len] = '\0';
716:
717: errno = 0;
1.66 millert 718: *nf = (size_t)strtoul(f, NULL, 10);
1.44 millert 719: if (errno != 0)
720: errx(2, "Invalid key position");
721:
722: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
723: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
1.57 millert 724:
1.63 millert 725: c = sort_malloc(len + 1);
1.57 millert 726: memcpy(c, s + pmatch[2].rm_so + 1, len);
1.44 millert 727: c[len] = '\0';
728:
729: errno = 0;
1.66 millert 730: *nc = (size_t)strtoul(c, NULL, 10);
1.44 millert 731: if (errno != 0)
732: errx(2, "Invalid key position");
733: }
734:
735: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
736:
737: len = pmatch[3].rm_eo - pmatch[3].rm_so;
1.4 millert 738:
1.56 millert 739: if (len >= sopts_size)
740: errx(2, "Invalid key position");
1.57 millert 741: memcpy(sopts, s + pmatch[3].rm_so, len);
1.44 millert 742: sopts[len] = '\0';
1.1 millert 743: }
1.4 millert 744:
1.44 millert 745: ret = 0;
1.4 millert 746:
1.44 millert 747: end:
1.61 millert 748: sort_free(c);
749: sort_free(f);
1.44 millert 750: regfree(&re);
751:
752: return ret;
753: }
754:
755: /*
756: * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
757: */
758: static void
759: fix_obsolete_keys(int *argc, char **argv)
760: {
761: char sopt[129];
762: int i;
763:
764: for (i = 1; i < *argc; i++) {
1.60 millert 765: const char *arg1 = argv[i];
1.44 millert 766:
1.60 millert 767: if (arg1[0] == '+') {
1.56 millert 768: size_t c1, f1;
1.44 millert 769: char sopts1[128];
770:
771: sopts1[0] = 0;
772: c1 = f1 = 0;
773:
1.56 millert 774: if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1,
775: sizeof(sopts1)) < 0)
1.44 millert 776: continue;
1.60 millert 777:
778: f1 += 1;
779: c1 += 1;
780: if (i + 1 < *argc) {
781: const char *arg2 = argv[i + 1];
782:
783: if (arg2[0] == '-') {
784: size_t c2, f2;
785: char sopts2[128];
786:
787: sopts2[0] = 0;
788: c2 = f2 = 0;
789:
790: if (parse_pos_obs(arg2 + 1, &f2, &c2,
791: sopts2, sizeof(sopts2)) >= 0) {
792: int j;
793: if (c2 > 0)
794: f2 += 1;
795: snprintf(sopt, sizeof(sopt),
796: "-k%zu.%zu%s,%zu.%zu%s",
797: f1, c1, sopts1, f2,
798: c2, sopts2);
799: argv[i] = sort_strdup(sopt);
800: for (j = i + 1; j + 1 < *argc; j++)
801: argv[j] = argv[j + 1];
802: *argc -= 1;
803: continue;
1.44 millert 804: }
805: }
1.1 millert 806: }
1.60 millert 807: snprintf(sopt, sizeof(sopt), "-k%zu.%zu%s",
808: f1, c1, sopts1);
809: argv[i] = sort_strdup(sopt);
1.44 millert 810: }
1.1 millert 811: }
1.44 millert 812: }
813:
814: /*
815: * Set random seed
816: */
817: static void
818: set_random_seed(void)
819: {
820: if (!need_random)
821: return;
1.4 millert 822:
1.44 millert 823: MD5Init(&md5_ctx);
824: if (random_source != NULL) {
825: unsigned char buf[BUFSIZ];
826: size_t nr;
827: FILE *fp;
828:
829: if ((fp = fopen(random_source, "r")) == NULL)
830: err(2, "%s", random_source);
831: while ((nr = fread(buf, 1, sizeof(buf), fp)) != 0)
832: MD5Update(&md5_ctx, buf, nr);
833: if (ferror(fp))
834: err(2, "%s", random_source);
835: fclose(fp);
1.1 millert 836: } else {
1.44 millert 837: unsigned char rsd[1024];
838:
839: arc4random_buf(rsd, sizeof(rsd));
840: MD5Update(&md5_ctx, rsd, sizeof(rsd));
841: }
842: }
843:
844: /*
845: * Main function.
846: */
847: int
848: main(int argc, char *argv[])
849: {
850: char *outfile, *real_outfile;
851: int c, result;
852: size_t i;
853: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
854: { false, false, false, false, false, false };
855:
856: result = 0;
1.51 millert 857: outfile = "-";
1.44 millert 858: real_outfile = NULL;
859:
860: struct sort_mods *sm = &default_sort_mods_object;
861:
862: init_tmp_files();
863:
864: set_signal_handler();
865:
1.51 millert 866: atexit(clear_tmp_files);
867:
1.44 millert 868: set_hw_params();
869: set_locale();
870: set_tmpdir();
871: set_sort_opts();
872:
873: fix_obsolete_keys(&argc, argv);
874:
875: while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
876: != -1)) {
877:
878: check_mutually_exclusive_flags(c, mef_flags);
879:
880: if (!set_sort_modifier(sm, c)) {
881: switch (c) {
882: case 'c':
883: sort_opts_vals.cflag = true;
884: if (optarg) {
885: if (!strcmp(optarg, "diagnose-first"))
886: ;
887: else if (!strcmp(optarg, "silent") ||
888: !strcmp(optarg, "quiet"))
889: sort_opts_vals.csilentflag = true;
890: else if (*optarg)
891: unknown(optarg);
892: }
893: break;
894: case 'C':
895: sort_opts_vals.cflag = true;
896: sort_opts_vals.csilentflag = true;
897: break;
898: case 'k':
899: {
900: sort_opts_vals.complex_sort = true;
901: sort_opts_vals.kflag = true;
902:
903: keys_num++;
904: keys = sort_reallocarray(keys, keys_num,
905: sizeof(struct key_specs));
906: memset(&(keys[keys_num - 1]), 0,
907: sizeof(struct key_specs));
908:
1.66 millert 909: if (parse_k(optarg, &(keys[keys_num - 1])) < 0)
1.44 millert 910: errc(2, EINVAL, "-k %s", optarg);
911:
912: break;
913: }
914: case 'm':
915: sort_opts_vals.mflag = true;
916: break;
917: case 'o':
1.51 millert 918: outfile = optarg;
1.44 millert 919: break;
920: case 's':
921: sort_opts_vals.sflag = true;
922: break;
923: case 'S':
924: available_free_memory =
925: parse_memory_buffer_value(optarg);
926: break;
927: case 'T':
928: tmpdir = sort_strdup(optarg);
929: break;
930: case 't':
931: while (strlen(optarg) > 1) {
932: if (optarg[0] != '\\') {
933: errc(2, EINVAL, "%s", optarg);
934: }
935: optarg += 1;
936: if (*optarg == '0') {
937: *optarg = 0;
938: break;
939: }
940: }
941: sort_opts_vals.tflag = true;
942: sort_opts_vals.field_sep = btowc(optarg[0]);
943: if (sort_opts_vals.field_sep == WEOF) {
944: errno = EINVAL;
945: err(2, NULL);
946: }
947: if (!gnusort_numeric_compatibility) {
948: if (symbol_decimal_point == sort_opts_vals.field_sep)
949: symbol_decimal_point = WEOF;
950: if (symbol_thousands_sep == sort_opts_vals.field_sep)
951: symbol_thousands_sep = WEOF;
952: if (symbol_negative_sign == sort_opts_vals.field_sep)
953: symbol_negative_sign = WEOF;
954: if (symbol_positive_sign == sort_opts_vals.field_sep)
955: symbol_positive_sign = WEOF;
956: }
957: break;
958: case 'u':
959: sort_opts_vals.uflag = true;
960: /* stable sort for the correct unique val */
961: sort_opts_vals.sflag = true;
962: break;
963: case 'z':
964: sort_opts_vals.zflag = true;
965: break;
966: case SORT_OPT:
1.62 millert 967: if (!strcmp(optarg, "general-numeric"))
968: set_sort_modifier(sm, 'g');
969: else if (!strcmp(optarg, "human-numeric"))
970: set_sort_modifier(sm, 'h');
971: else if (!strcmp(optarg, "numeric"))
972: set_sort_modifier(sm, 'n');
973: else if (!strcmp(optarg, "month"))
974: set_sort_modifier(sm, 'M');
975: else if (!strcmp(optarg, "random"))
976: set_sort_modifier(sm, 'R');
977: else
978: unknown(optarg);
1.44 millert 979: break;
980: case QSORT_OPT:
981: sort_opts_vals.sort_method = SORT_QSORT;
982: break;
983: case 'H':
984: sort_opts_vals.sort_method = SORT_MERGESORT;
985: break;
986: case MMAP_OPT:
987: use_mmap = true;
988: break;
989: case HEAPSORT_OPT:
990: sort_opts_vals.sort_method = SORT_HEAPSORT;
991: break;
992: case RADIXSORT_OPT:
993: sort_opts_vals.sort_method = SORT_RADIXSORT;
994: break;
995: case RANDOMSOURCE_OPT:
996: random_source = strdup(optarg);
997: break;
998: case COMPRESSPROGRAM_OPT:
999: compress_program = strdup(optarg);
1000: break;
1001: case FF_OPT:
1002: read_fns_from_file0(optarg);
1003: break;
1004: case BS_OPT:
1005: {
1.54 millert 1006: const char *errstr;
1007:
1008: max_open_files = strtonum(optarg, 2,
1009: UINT_MAX - 1, &errstr) + 1;
1010: if (errstr != NULL)
1011: errx(2, "--batch-size argument is %s",
1012: errstr);
1013: break;
1.44 millert 1014: }
1015: case VERSION_OPT:
1016: printf("%s\n", VERSION);
1017: exit(EXIT_SUCCESS);
1018: /* NOTREACHED */
1019: break;
1020: case DEBUG_OPT:
1021: debug_sort = true;
1022: break;
1023: case HELP_OPT:
1024: usage(0);
1025: /* NOTREACHED */
1026: break;
1027: default:
1028: usage(2);
1029: /* NOTREACHED */
1030: }
1031: }
1032: }
1033:
1034: argc -= optind;
1035: argv += optind;
1036:
1037: if (keys_num == 0) {
1038: keys_num = 1;
1.67 millert 1039: keys = sort_reallocarray(keys, 1, sizeof(struct key_specs));
1.44 millert 1040: memset(&(keys[0]), 0, sizeof(struct key_specs));
1041: keys[0].c1 = 1;
1042: keys[0].pos1b = default_sort_mods->bflag;
1043: keys[0].pos2b = default_sort_mods->bflag;
1044: memcpy(&(keys[0].sm), default_sort_mods,
1045: sizeof(struct sort_mods));
1046: }
1047:
1048: for (i = 0; i < keys_num; i++) {
1049: struct key_specs *ks;
1050:
1051: ks = &(keys[i]);
1052:
1053: if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1054: !(ks->pos2b)) {
1055: ks->pos1b = sm->bflag;
1056: ks->pos2b = sm->bflag;
1057: memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1058: }
1059:
1060: ks->sm.func = get_sort_func(&(ks->sm));
1061: }
1062:
1063: if (argv_from_file0) {
1064: argc = argc_from_file0;
1065: argv = argv_from_file0;
1066: }
1067:
1068: if (debug_sort) {
1069: printf("Memory to be used for sorting: %llu\n",
1070: available_free_memory);
1071: printf("Using collate rules of %s locale\n",
1072: setlocale(LC_COLLATE, NULL));
1073: if (byte_sort)
1074: printf("Byte sort is used\n");
1075: if (print_symbols_on_debug) {
1076: printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1077: if (symbol_thousands_sep)
1078: printf("Thousands separator: <%lc>\n",
1079: symbol_thousands_sep);
1080: printf("Positive sign: <%lc>\n", symbol_positive_sign);
1081: printf("Negative sign: <%lc>\n", symbol_negative_sign);
1082: }
1083: }
1084:
1085: set_random_seed();
1.4 millert 1086:
1.44 millert 1087: /* Case when the outfile equals one of the input files: */
1.51 millert 1088: if (strcmp(outfile, "-") != 0) {
1089: struct stat sb;
1090: int fd, i;
1.44 millert 1091:
1092: for (i = 0; i < argc; ++i) {
1093: if (strcmp(argv[i], outfile) == 0) {
1.51 millert 1094: if (stat(outfile, &sb) == -1)
1095: err(2, "%s", outfile);
1096: if (access(outfile, W_OK) == -1)
1097: err(2, "%s", outfile);
1098: real_outfile = outfile;
1099: sort_asprintf(&outfile, "%s.XXXXXXXXXX",
1100: real_outfile);
1101: if ((fd = mkstemp(outfile)) == -1 ||
1102: fchmod(fd, sb.st_mode & ALLPERMS) == -1)
1103: err(2, "%s", outfile);
1104: close(fd);
1.44 millert 1105: tmp_file_atexit(outfile);
1.51 millert 1106: break;
1.44 millert 1107: }
1108: }
1109: }
1110:
1111: if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1112: struct file_list fl;
1113: struct sort_list list;
1114:
1115: sort_list_init(&list);
1116: file_list_init(&fl, true);
1117:
1118: if (argc < 1)
1119: procfile("-", &list, &fl);
1120: else {
1121: while (argc > 0) {
1122: procfile(*argv, &list, &fl);
1123: --argc;
1124: ++argv;
1125: }
1126: }
1127:
1128: if (fl.count < 1)
1129: sort_list_to_file(&list, outfile);
1130: else {
1131: if (list.count > 0) {
1132: char *flast = new_tmp_file_name();
1133:
1134: sort_list_to_file(&list, flast);
1135: file_list_add(&fl, flast, false);
1136: }
1137: merge_files(&fl, outfile);
1138: }
1139:
1140: file_list_clean(&fl);
1141:
1142: /*
1143: * We are about to exit the program, so we can ignore
1144: * the clean-up for speed
1145: *
1146: * sort_list_clean(&list);
1147: */
1148:
1149: } else if (sort_opts_vals.cflag) {
1150: result = (argc == 0) ? (check("-")) : (check(*argv));
1151: } else if (sort_opts_vals.mflag) {
1152: struct file_list fl;
1153:
1154: file_list_init(&fl, false);
1155: file_list_populate(&fl, argc, argv, true);
1156: merge_files(&fl, outfile);
1157: file_list_clean(&fl);
1158: }
1159:
1160: if (real_outfile) {
1161: if (rename(outfile, real_outfile) < 0)
1162: err(2, "%s", real_outfile);
1.51 millert 1163: sort_free(outfile);
1.44 millert 1164: }
1.4 millert 1165:
1.44 millert 1166: return result;
1.1 millert 1167: }