Annotation of src/usr.bin/sort/sort.c, Revision 1.44
1.44 ! millert 1: /* $OpenBSD$ */
1.1 millert 2:
3: /*-
1.44 ! millert 4: * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
! 5: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
! 6: * All rights reserved.
1.1 millert 7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
1.44 ! millert 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1.1 millert 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.44 ! millert 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1.1 millert 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
1.44 ! millert 30: #include <sys/stat.h>
! 31: #include <sys/types.h>
1.1 millert 32:
1.44 ! millert 33: #include <err.h>
! 34: #include <errno.h>
! 35: #include <getopt.h>
! 36: #include <limits.h>
1.16 ericj 37: #include <locale.h>
1.44 ! millert 38: #include <md5.h>
! 39: #include <regex.h>
1.1 millert 40: #include <signal.h>
1.44 ! millert 41: #include <stdbool.h>
! 42: #include <stdio.h>
1.1 millert 43: #include <stdlib.h>
44: #include <string.h>
45: #include <unistd.h>
1.44 ! millert 46: #include <wchar.h>
! 47: #include <wctype.h>
! 48:
! 49: #include "coll.h"
! 50: #include "file.h"
! 51: #include "sort.h"
! 52:
! 53: #define OPTIONS "bCcdfgHhik:Mmno:RrS:st:T:uVz"
! 54:
! 55: static bool need_random;
! 56: static const char *random_source;
! 57:
! 58: MD5_CTX md5_ctx;
! 59:
! 60: struct sort_opts sort_opts_vals;
! 61:
! 62: bool debug_sort;
! 63: bool need_hint;
! 64:
! 65: static bool gnusort_numeric_compatibility;
! 66:
! 67: static struct sort_mods default_sort_mods_object;
! 68: struct sort_mods * const default_sort_mods = &default_sort_mods_object;
! 69:
! 70: static bool print_symbols_on_debug;
! 71:
! 72: /*
! 73: * Arguments from file (when file0-from option is used:
! 74: */
! 75: static size_t argc_from_file0 = (size_t)-1;
! 76: static char **argv_from_file0;
! 77:
! 78: /*
! 79: * Placeholder symbols for options which have no single-character equivalent
! 80: */
! 81: enum {
! 82: SORT_OPT = CHAR_MAX + 1,
! 83: HELP_OPT,
! 84: FF_OPT,
! 85: BS_OPT,
! 86: VERSION_OPT,
! 87: DEBUG_OPT,
! 88: RANDOMSOURCE_OPT,
! 89: COMPRESSPROGRAM_OPT,
! 90: QSORT_OPT,
! 91: HEAPSORT_OPT,
! 92: RADIXSORT_OPT,
! 93: MMAP_OPT
! 94: };
! 95:
! 96: #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
! 97: static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
! 98:
! 99: static const struct option long_options[] = {
! 100: { "batch-size", required_argument, NULL, BS_OPT },
! 101: { "buffer-size", required_argument, NULL, 'S' },
! 102: { "check", optional_argument, NULL, 'c' },
! 103: { "check=silent|quiet", optional_argument, NULL, 'C' },
! 104: { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
! 105: { "debug", no_argument, NULL, DEBUG_OPT },
! 106: { "dictionary-order", no_argument, NULL, 'd' },
! 107: { "field-separator", required_argument, NULL, 't' },
! 108: { "files0-from", required_argument, NULL, FF_OPT },
! 109: { "general-numeric-sort", no_argument, NULL, 'g' },
! 110: { "heapsort", no_argument, NULL, HEAPSORT_OPT },
! 111: { "help", no_argument, NULL, HELP_OPT },
! 112: { "human-numeric-sort", no_argument, NULL, 'h' },
! 113: { "ignore-leading-blanks", no_argument, NULL, 'b' },
! 114: { "ignore-case", no_argument, NULL, 'f' },
! 115: { "ignore-nonprinting", no_argument, NULL, 'i' },
! 116: { "key", required_argument, NULL, 'k' },
! 117: { "merge", no_argument, NULL, 'm' },
! 118: { "mergesort", no_argument, NULL, 'H' },
! 119: { "mmap", no_argument, NULL, MMAP_OPT },
! 120: { "month-sort", no_argument, NULL, 'M' },
! 121: { "numeric-sort", no_argument, NULL, 'n' },
! 122: { "output", required_argument, NULL, 'o' },
! 123: { "qsort", no_argument, NULL, QSORT_OPT },
! 124: { "radixsort", no_argument, NULL, RADIXSORT_OPT },
! 125: { "random-sort", no_argument, NULL, 'R' },
! 126: { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
! 127: { "reverse", no_argument, NULL, 'r' },
! 128: { "sort", required_argument, NULL, SORT_OPT },
! 129: { "stable", no_argument, NULL, 's' },
! 130: { "temporary-directory", required_argument, NULL, 'T' },
! 131: { "unique", no_argument, NULL, 'u' },
! 132: { "version", no_argument, NULL, VERSION_OPT },
! 133: { "version-sort", no_argument, NULL, 'V' },
! 134: { "zero-terminated", no_argument, NULL, 'z' },
! 135: { NULL, no_argument, NULL, 0 }
! 136: };
! 137:
! 138: /*
! 139: * Check where sort modifier is present
! 140: */
! 141: static bool
! 142: sort_modifier_empty(struct sort_mods *sm)
! 143: {
! 144:
! 145: if (sm == NULL)
! 146: return true;
! 147: return !(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
! 148: sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag);
! 149: }
! 150:
! 151: /*
! 152: * Print out usage text.
! 153: */
! 154: static __dead void
! 155: usage(int exit_val)
! 156: {
1.1 millert 157:
1.44 ! millert 158: fprintf(exit_val ? stderr : stdout,
! 159: "usage: %s [-bcCdfghiRMmnrsuVz] [-k field1[,field2]] [-o output] "
! 160: "[-S memsize]\n\t[-T dir] [-t char] [file ...]\n", getprogname());
! 161: exit(exit_val);
! 162: }
1.4 millert 163:
1.1 millert 164: /*
1.44 ! millert 165: * Read input file names from a file (file0-from option).
1.1 millert 166: */
1.44 ! millert 167: static void
! 168: read_fns_from_file0(const char *fn)
! 169: {
! 170: if (fn) {
! 171: struct file0_reader f0r;
! 172: FILE *f;
! 173:
! 174: f = fopen(fn, "r");
! 175: if (f == NULL)
! 176: err(2, "%s", fn);
! 177:
! 178: memset(&f0r, 0, sizeof(f0r));
! 179: f0r.f = f;
! 180:
! 181: while (!feof(f)) {
! 182: char *line = read_file0_line(&f0r);
! 183:
! 184: if (line && *line) {
! 185: if (argc_from_file0 == (size_t)-1)
! 186: argc_from_file0 = 0;
! 187: ++argc_from_file0;
! 188: argv_from_file0 = sort_reallocarray(argv_from_file0,
! 189: argc_from_file0, sizeof(char *));
! 190: argv_from_file0[argc_from_file0 - 1] =
! 191: sort_strdup(line);
! 192: }
! 193: }
! 194: closefile(f, fn);
! 195: }
! 196: }
1.4 millert 197:
1.1 millert 198: /*
1.44 ! millert 199: * Check how much RAM is available for the sort.
1.1 millert 200: */
1.44 ! millert 201: static void
! 202: set_hw_params(void)
! 203: {
! 204: long pages, psize;
1.1 millert 205:
1.44 ! millert 206: pages = sysconf(_SC_PHYS_PAGES);
! 207: if (pages < 1) {
! 208: warn("sysconf pages");
! 209: pages = 1;
! 210: }
! 211: psize = sysconf(_SC_PAGESIZE);
! 212: if (psize < 1) {
! 213: warn("sysconf psize");
! 214: psize = 4096;
! 215: }
! 216:
! 217: free_memory = (unsigned long long) pages * (unsigned long long) psize;
! 218: available_free_memory = free_memory / 2;
! 219:
! 220: if (available_free_memory < 1024)
! 221: available_free_memory = 1024;
! 222: }
! 223:
! 224: /*
! 225: * Convert "plain" symbol to wide symbol, with default value.
! 226: */
! 227: static void
! 228: conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
! 229: {
1.1 millert 230:
1.44 ! millert 231: if (wc && c) {
! 232: int res;
1.1 millert 233:
1.44 ! millert 234: res = mbtowc(wc, c, MB_CUR_MAX);
! 235: if (res < 1)
! 236: *wc = def;
1.12 millert 237: }
1.44 ! millert 238: }
1.12 millert 239:
1.44 ! millert 240: /*
! 241: * Set current locale symbols.
! 242: */
! 243: static void
! 244: set_locale(void)
1.1 millert 245: {
1.44 ! millert 246: struct lconv *lc;
! 247: const char *locale;
1.4 millert 248:
1.16 ericj 249: setlocale(LC_ALL, "");
250:
1.44 ! millert 251: lc = localeconv();
! 252:
! 253: if (lc) {
! 254: /* obtain LC_NUMERIC info */
! 255: /* Convert to wide char form */
! 256: conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
! 257: symbol_decimal_point);
! 258: conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
! 259: symbol_thousands_sep);
! 260: conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
! 261: symbol_positive_sign);
! 262: conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
! 263: symbol_negative_sign);
! 264: }
! 265:
! 266: if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
! 267: gnusort_numeric_compatibility = true;
! 268:
! 269: locale = setlocale(LC_COLLATE, NULL);
! 270: if (locale != NULL) {
! 271: char *tmpl;
! 272: const char *byteclocale;
! 273:
! 274: tmpl = sort_strdup(locale);
! 275: byteclocale = setlocale(LC_COLLATE, "C");
! 276: if (byteclocale && strcmp(byteclocale, tmpl) == 0) {
! 277: byte_sort = true;
! 278: } else {
! 279: byteclocale = setlocale(LC_COLLATE, "POSIX");
! 280: if (byteclocale && strcmp(byteclocale, tmpl) == 0)
! 281: byte_sort = true;
! 282: else
! 283: setlocale(LC_COLLATE, tmpl);
! 284: }
! 285: sort_free(tmpl);
! 286: }
! 287: if (!byte_sort)
! 288: sort_mb_cur_max = MB_CUR_MAX;
! 289: }
! 290:
! 291: /*
! 292: * Set directory temporary files.
! 293: */
! 294: static void
! 295: set_tmpdir(void)
! 296: {
! 297: char *td;
! 298:
! 299: td = getenv("TMPDIR");
! 300: if (td != NULL)
! 301: tmpdir = sort_strdup(td);
! 302: }
! 303:
! 304: /*
! 305: * Parse -S option.
! 306: */
! 307: static unsigned long long
! 308: parse_memory_buffer_value(const char *value)
! 309: {
! 310:
! 311: if (value == NULL)
! 312: return available_free_memory;
! 313: else {
! 314: char *endptr;
! 315: unsigned long long membuf;
! 316:
! 317: endptr = NULL;
! 318: errno = 0;
! 319: membuf = strtoll(value, &endptr, 10);
! 320:
! 321: if (errno != 0) {
! 322: warn("Wrong memory buffer specification");
! 323: membuf = available_free_memory;
! 324: } else {
! 325: switch (*endptr){
! 326: case 'Y':
! 327: membuf *= 1024;
! 328: /* FALLTHROUGH */
! 329: case 'Z':
! 330: membuf *= 1024;
! 331: /* FALLTHROUGH */
! 332: case 'E':
! 333: membuf *= 1024;
! 334: /* FALLTHROUGH */
! 335: case 'P':
! 336: membuf *= 1024;
! 337: /* FALLTHROUGH */
! 338: case 'T':
! 339: membuf *= 1024;
! 340: /* FALLTHROUGH */
! 341: case 'G':
! 342: membuf *= 1024;
! 343: /* FALLTHROUGH */
! 344: case 'M':
! 345: membuf *= 1024;
! 346: /* FALLTHROUGH */
! 347: case '\0':
! 348: case 'K':
! 349: membuf *= 1024;
! 350: /* FALLTHROUGH */
! 351: case 'b':
! 352: break;
! 353: case '%':
! 354: membuf = (available_free_memory * membuf) /
! 355: 100;
! 356: break;
! 357: default:
! 358: warnc(EINVAL, "%s", optarg);
! 359: membuf = available_free_memory;
! 360: }
! 361: }
! 362: return membuf;
! 363: }
! 364: }
! 365:
! 366: /*
! 367: * Signal handler that clears the temporary files.
! 368: */
! 369: static void
! 370: sig_handler(int sig __unused, siginfo_t *siginfo __unused,
! 371: void *context __unused)
! 372: {
! 373:
! 374: clear_tmp_files();
! 375: exit(2);
! 376: }
! 377:
! 378: /*
! 379: * Set signal handler on panic signals.
! 380: */
! 381: static void
! 382: set_signal_handler(void)
! 383: {
! 384: struct sigaction sa;
! 385:
! 386: memset(&sa, 0, sizeof(sa));
! 387: sa.sa_sigaction = &sig_handler;
! 388: sa.sa_flags = SA_SIGINFO;
! 389:
! 390: if (sigaction(SIGTERM, &sa, NULL) < 0) {
! 391: warn("sigaction(SIGTERM)");
! 392: return;
! 393: }
! 394: if (sigaction(SIGHUP, &sa, NULL) < 0) {
! 395: warn("sigaction(SIGHUP)");
! 396: return;
! 397: }
! 398: if (sigaction(SIGINT, &sa, NULL) < 0) {
! 399: warn("sigaction(SIGINT)");
! 400: return;
! 401: }
! 402: if (sigaction(SIGQUIT, &sa, NULL) < 0) {
! 403: warn("sigaction(SIGQUIT)");
! 404: return;
! 405: }
! 406: if (sigaction(SIGABRT, &sa, NULL) < 0) {
! 407: warn("sigaction(SIGABRT)");
! 408: return;
! 409: }
! 410: if (sigaction(SIGBUS, &sa, NULL) < 0) {
! 411: warn("sigaction(SIGBUS)");
! 412: return;
! 413: }
! 414: if (sigaction(SIGSEGV, &sa, NULL) < 0) {
! 415: warn("sigaction(SIGSEGV)");
! 416: return;
! 417: }
! 418: if (sigaction(SIGUSR1, &sa, NULL) < 0) {
! 419: warn("sigaction(SIGUSR1)");
! 420: return;
! 421: }
! 422: if (sigaction(SIGUSR2, &sa, NULL) < 0) {
! 423: warn("sigaction(SIGUSR2)");
! 424: return;
! 425: }
! 426: }
! 427:
! 428: /*
! 429: * Print "unknown" message and exit with status 2.
! 430: */
! 431: static void
! 432: unknown(const char *what)
! 433: {
! 434:
! 435: errx(2, "Unknown feature: %s", what);
! 436: }
! 437:
! 438: /*
! 439: * Check whether contradictory input options are used.
! 440: */
! 441: static void
! 442: check_mutually_exclusive_flags(char c, bool *mef_flags)
! 443: {
! 444: int i, fo_index, mec;
! 445: bool found_others, found_this;
! 446:
! 447: found_others = found_this =false;
! 448: fo_index = 0;
! 449:
! 450: for (i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
! 451: mec = mutually_exclusive_flags[i];
! 452:
! 453: if (mec != c) {
! 454: if (mef_flags[i]) {
! 455: if (found_this)
! 456: errx(1, "%c:%c: mutually exclusive flags", c, mec);
! 457: found_others = true;
! 458: fo_index = i;
! 459: }
! 460: } else {
! 461: if (found_others)
! 462: errx(1, "%c:%c: mutually exclusive flags", c, mutually_exclusive_flags[fo_index]);
! 463: mef_flags[i] = true;
! 464: found_this = true;
! 465: }
! 466: }
! 467: }
! 468:
! 469: /*
! 470: * Initialise sort opts data.
! 471: */
! 472: static void
! 473: set_sort_opts(void)
! 474: {
! 475:
! 476: memset(&default_sort_mods_object, 0,
! 477: sizeof(default_sort_mods_object));
! 478: memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
! 479: default_sort_mods_object.func =
! 480: get_sort_func(&default_sort_mods_object);
! 481: }
! 482:
! 483: /*
! 484: * Set a sort modifier on a sort modifiers object.
! 485: */
! 486: static bool
! 487: set_sort_modifier(struct sort_mods *sm, int c)
! 488: {
! 489:
! 490: if (sm) {
! 491: switch (c){
! 492: case 'b':
! 493: sm->bflag = true;
1.1 millert 494: break;
495: case 'd':
1.44 ! millert 496: sm->dflag = true;
! 497: break;
1.4 millert 498: case 'f':
1.44 ! millert 499: sm->fflag = true;
! 500: break;
! 501: case 'g':
! 502: sm->gflag = true;
! 503: need_hint = true;
! 504: break;
1.1 millert 505: case 'i':
1.44 ! millert 506: sm->iflag = true;
1.1 millert 507: break;
508: case 'R':
1.44 ! millert 509: sm->Rflag = true;
! 510: need_random = true;
1.1 millert 511: break;
1.44 ! millert 512: case 'M':
! 513: initialise_months();
! 514: sm->Mflag = true;
! 515: need_hint = true;
1.1 millert 516: break;
1.44 ! millert 517: case 'n':
! 518: sm->nflag = true;
! 519: need_hint = true;
! 520: print_symbols_on_debug = true;
1.1 millert 521: break;
1.44 ! millert 522: case 'r':
! 523: sm->rflag = true;
1.1 millert 524: break;
1.44 ! millert 525: case 'V':
! 526: sm->Vflag = true;
1.1 millert 527: break;
1.44 ! millert 528: case 'h':
! 529: sm->hflag = true;
! 530: need_hint = true;
! 531: print_symbols_on_debug = true;
1.1 millert 532: break;
1.8 deraadt 533: default:
1.44 ! millert 534: return false;
1.1 millert 535: }
1.44 ! millert 536: sort_opts_vals.complex_sort = true;
! 537: sm->func = get_sort_func(sm);
! 538: }
! 539: return true;
! 540: }
! 541:
! 542: /*
! 543: * Parse POS in -k option.
! 544: */
! 545: static int
! 546: parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
! 547: {
! 548: regmatch_t pmatch[4];
! 549: regex_t re;
! 550: char *c, *f;
! 551: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
! 552: size_t len, nmatch;
! 553: int ret;
! 554:
! 555: ret = -1;
! 556: nmatch = 4;
! 557: c = f = NULL;
! 558:
! 559: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
! 560: return -1;
! 561:
! 562: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
! 563: goto end;
! 564:
! 565: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
! 566: goto end;
! 567:
! 568: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
! 569: goto end;
! 570:
! 571: len = pmatch[1].rm_eo - pmatch[1].rm_so;
! 572: f = sort_malloc((len + 1) * sizeof(char));
! 573:
! 574: strncpy(f, s + pmatch[1].rm_so, len);
! 575: f[len] = '\0';
! 576:
! 577: if (second) {
! 578: errno = 0;
! 579: ks->f2 = (size_t) strtoul(f, NULL, 10);
! 580: if (errno != 0)
! 581: err(2, "-k");
! 582: if (ks->f2 == 0) {
! 583: warn("0 field in key specs");
! 584: goto end;
! 585: }
! 586: } else {
! 587: errno = 0;
! 588: ks->f1 = (size_t) strtoul(f, NULL, 10);
! 589: if (errno != 0)
! 590: err(2, "-k");
! 591: if (ks->f1 == 0) {
! 592: warn("0 field in key specs");
! 593: goto end;
! 594: }
! 595: }
! 596:
! 597: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
! 598: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
! 599: c = sort_malloc((len + 1) * sizeof(char));
! 600:
! 601: strncpy(c, s + pmatch[2].rm_so + 1, len);
! 602: c[len] = '\0';
! 603:
! 604: if (second) {
! 605: errno = 0;
! 606: ks->c2 = (size_t) strtoul(c, NULL, 10);
! 607: if (errno != 0)
! 608: err(2, "-k");
! 609: } else {
! 610: errno = 0;
! 611: ks->c1 = (size_t) strtoul(c, NULL, 10);
! 612: if (errno != 0)
! 613: err(2, "-k");
! 614: if (ks->c1 == 0) {
! 615: warn("0 column in key specs");
! 616: goto end;
! 617: }
! 618: }
! 619: } else {
! 620: if (second)
! 621: ks->c2 = 0;
! 622: else
! 623: ks->c1 = 1;
! 624: }
! 625:
! 626: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
! 627: regoff_t i = 0;
! 628:
! 629: for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
! 630: check_mutually_exclusive_flags(s[i], mef_flags);
! 631: if (s[i] == 'b') {
! 632: if (second)
! 633: ks->pos2b = true;
! 634: else
! 635: ks->pos1b = true;
! 636: } else if (!set_sort_modifier(&(ks->sm), s[i]))
! 637: goto end;
! 638: }
! 639: }
! 640:
! 641: ret = 0;
! 642:
! 643: end:
! 644:
! 645: if (c)
! 646: sort_free(c);
! 647: if (f)
! 648: sort_free(f);
! 649: regfree(&re);
! 650:
! 651: return ret;
! 652: }
! 653:
! 654: /*
! 655: * Parse -k option value.
! 656: */
! 657: static int
! 658: parse_k(const char *s, struct key_specs *ks)
! 659: {
! 660: int ret = -1;
! 661: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
! 662: { false, false, false, false, false, false };
! 663:
! 664: if (s && *s) {
! 665: char *sptr;
! 666:
! 667: sptr = strchr(s, ',');
! 668: if (sptr) {
! 669: size_t size1;
! 670: char *pos1, *pos2;
! 671:
! 672: size1 = sptr - s;
! 673:
! 674: if (size1 < 1)
! 675: return -1;
! 676: pos1 = sort_malloc((size1 + 1) * sizeof(char));
! 677:
! 678: strncpy(pos1, s, size1);
! 679: pos1[size1] = '\0';
! 680:
! 681: ret = parse_pos(pos1, ks, mef_flags, false);
! 682:
! 683: sort_free(pos1);
! 684: if (ret < 0)
! 685: return ret;
! 686:
! 687: pos2 = sort_strdup(sptr + 1);
! 688: ret = parse_pos(pos2, ks, mef_flags, true);
! 689: sort_free(pos2);
! 690: } else
! 691: ret = parse_pos(s, ks, mef_flags, false);
1.1 millert 692: }
1.4 millert 693:
1.44 ! millert 694: return ret;
! 695: }
! 696:
! 697: /*
! 698: * Parse POS in +POS -POS option.
! 699: */
! 700: static int
! 701: parse_pos_obs(const char *s, int *nf, int *nc, char *sopts)
! 702: {
! 703: regex_t re;
! 704: regmatch_t pmatch[4];
! 705: char *c, *f;
! 706: const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
! 707: int ret;
! 708: size_t len, nmatch;
! 709:
! 710: ret = -1;
! 711: nmatch = 4;
! 712: c = f = NULL;
! 713: *nc = *nf = 0;
! 714:
! 715: if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
! 716: return -1;
! 717:
! 718: if (regexec(&re, s, nmatch, pmatch, 0) != 0)
! 719: goto end;
! 720:
! 721: if (pmatch[0].rm_eo <= pmatch[0].rm_so)
! 722: goto end;
! 723:
! 724: if (pmatch[1].rm_eo <= pmatch[1].rm_so)
! 725: goto end;
! 726:
! 727: len = pmatch[1].rm_eo - pmatch[1].rm_so;
! 728: f = sort_malloc((len + 1) * sizeof(char));
! 729:
! 730: strncpy(f, s + pmatch[1].rm_so, len);
! 731: f[len] = '\0';
! 732:
! 733: errno = 0;
! 734: *nf = (size_t) strtoul(f, NULL, 10);
! 735: if (errno != 0)
! 736: errx(2, "Invalid key position");
! 737:
! 738: if (pmatch[2].rm_eo > pmatch[2].rm_so) {
! 739: len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
! 740: c = sort_malloc((len + 1) * sizeof(char));
! 741:
! 742: strncpy(c, s + pmatch[2].rm_so + 1, len);
! 743: c[len] = '\0';
! 744:
! 745: errno = 0;
! 746: *nc = (size_t) strtoul(c, NULL, 10);
! 747: if (errno != 0)
! 748: errx(2, "Invalid key position");
! 749: }
! 750:
! 751: if (pmatch[3].rm_eo > pmatch[3].rm_so) {
! 752:
! 753: len = pmatch[3].rm_eo - pmatch[3].rm_so;
1.4 millert 754:
1.44 ! millert 755: strncpy(sopts, s + pmatch[3].rm_so, len);
! 756: sopts[len] = '\0';
1.1 millert 757: }
1.4 millert 758:
1.44 ! millert 759: ret = 0;
1.4 millert 760:
1.44 ! millert 761: end:
! 762: if (c)
! 763: sort_free(c);
! 764: if (f)
! 765: sort_free(f);
! 766: regfree(&re);
! 767:
! 768: return ret;
! 769: }
! 770:
! 771: /*
! 772: * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
! 773: */
! 774: static void
! 775: fix_obsolete_keys(int *argc, char **argv)
! 776: {
! 777: char sopt[129];
! 778: int i;
! 779:
! 780: for (i = 1; i < *argc; i++) {
! 781: char *arg1;
! 782:
! 783: arg1 = argv[i];
! 784:
! 785: if (strlen(arg1) > 1 && arg1[0] == '+') {
! 786: int c1, f1;
! 787: char sopts1[128];
! 788:
! 789: sopts1[0] = 0;
! 790: c1 = f1 = 0;
! 791:
! 792: if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
! 793: continue;
1.1 millert 794: else {
1.44 ! millert 795: f1 += 1;
! 796: c1 += 1;
! 797: if (i + 1 < *argc) {
! 798: char *arg2 = argv[i + 1];
! 799:
! 800: if (strlen(arg2) > 1 &&
! 801: arg2[0] == '-') {
! 802: int c2, f2;
! 803: char sopts2[128];
! 804:
! 805: sopts2[0] = 0;
! 806: c2 = f2 = 0;
! 807:
! 808: if (parse_pos_obs(arg2 + 1,
! 809: &f2, &c2, sopts2) >= 0) {
! 810: int j;
! 811: if (c2 > 0)
! 812: f2 += 1;
! 813: snprintf(sopt,
! 814: sizeof(sopt),
! 815: "-k%d.%d%s,%d.%d%s",
! 816: f1, c1, sopts1, f2,
! 817: c2, sopts2);
! 818: argv[i] = sort_strdup(sopt);
! 819: for (j = i + 1; j + 1 < *argc; j++)
! 820: argv[j] = argv[j + 1];
! 821: *argc -= 1;
! 822: continue;
! 823: }
! 824: }
! 825: }
! 826: snprintf(sopt, sizeof(sopt), "-k%d.%d%s",
! 827: f1, c1, sopts1);
! 828: argv[i] = sort_strdup(sopt);
1.1 millert 829: }
1.44 ! millert 830: }
1.1 millert 831: }
1.44 ! millert 832: }
! 833:
! 834: /*
! 835: * Set random seed
! 836: */
! 837: static void
! 838: set_random_seed(void)
! 839: {
! 840: if (!need_random)
! 841: return;
1.4 millert 842:
1.44 ! millert 843: MD5Init(&md5_ctx);
! 844: if (random_source != NULL) {
! 845: unsigned char buf[BUFSIZ];
! 846: size_t nr;
! 847: FILE *fp;
! 848:
! 849: if ((fp = fopen(random_source, "r")) == NULL)
! 850: err(2, "%s", random_source);
! 851: while ((nr = fread(buf, 1, sizeof(buf), fp)) != 0)
! 852: MD5Update(&md5_ctx, buf, nr);
! 853: if (ferror(fp))
! 854: err(2, "%s", random_source);
! 855: fclose(fp);
1.1 millert 856: } else {
1.44 ! millert 857: unsigned char rsd[1024];
! 858:
! 859: arc4random_buf(rsd, sizeof(rsd));
! 860: MD5Update(&md5_ctx, rsd, sizeof(rsd));
! 861: }
! 862: }
! 863:
! 864: /*
! 865: * Main function.
! 866: */
! 867: int
! 868: main(int argc, char *argv[])
! 869: {
! 870: char *outfile, *real_outfile;
! 871: int c, result;
! 872: size_t i;
! 873: bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
! 874: { false, false, false, false, false, false };
! 875:
! 876: result = 0;
! 877: outfile = sort_strdup("-");
! 878: real_outfile = NULL;
! 879:
! 880: struct sort_mods *sm = &default_sort_mods_object;
! 881:
! 882: init_tmp_files();
! 883:
! 884: set_signal_handler();
! 885:
! 886: set_hw_params();
! 887: set_locale();
! 888: set_tmpdir();
! 889: set_sort_opts();
! 890:
! 891: fix_obsolete_keys(&argc, argv);
! 892:
! 893: while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
! 894: != -1)) {
! 895:
! 896: check_mutually_exclusive_flags(c, mef_flags);
! 897:
! 898: if (!set_sort_modifier(sm, c)) {
! 899:
! 900: switch (c) {
! 901: case 'c':
! 902: sort_opts_vals.cflag = true;
! 903: if (optarg) {
! 904: if (!strcmp(optarg, "diagnose-first"))
! 905: ;
! 906: else if (!strcmp(optarg, "silent") ||
! 907: !strcmp(optarg, "quiet"))
! 908: sort_opts_vals.csilentflag = true;
! 909: else if (*optarg)
! 910: unknown(optarg);
! 911: }
! 912: break;
! 913: case 'C':
! 914: sort_opts_vals.cflag = true;
! 915: sort_opts_vals.csilentflag = true;
! 916: break;
! 917: case 'k':
! 918: {
! 919: sort_opts_vals.complex_sort = true;
! 920: sort_opts_vals.kflag = true;
! 921:
! 922: keys_num++;
! 923: keys = sort_reallocarray(keys, keys_num,
! 924: sizeof(struct key_specs));
! 925: memset(&(keys[keys_num - 1]), 0,
! 926: sizeof(struct key_specs));
! 927:
! 928: if (parse_k(optarg, &(keys[keys_num - 1]))
! 929: < 0) {
! 930: errc(2, EINVAL, "-k %s", optarg);
! 931: }
! 932:
! 933: break;
! 934: }
! 935: case 'm':
! 936: sort_opts_vals.mflag = true;
! 937: break;
! 938: case 'o':
! 939: sort_free(outfile);
! 940: outfile = sort_strdup(optarg);
! 941: break;
! 942: case 's':
! 943: sort_opts_vals.sflag = true;
! 944: break;
! 945: case 'S':
! 946: available_free_memory =
! 947: parse_memory_buffer_value(optarg);
! 948: break;
! 949: case 'T':
! 950: tmpdir = sort_strdup(optarg);
! 951: break;
! 952: case 't':
! 953: while (strlen(optarg) > 1) {
! 954: if (optarg[0] != '\\') {
! 955: errc(2, EINVAL, "%s", optarg);
! 956: }
! 957: optarg += 1;
! 958: if (*optarg == '0') {
! 959: *optarg = 0;
! 960: break;
! 961: }
! 962: }
! 963: sort_opts_vals.tflag = true;
! 964: sort_opts_vals.field_sep = btowc(optarg[0]);
! 965: if (sort_opts_vals.field_sep == WEOF) {
! 966: errno = EINVAL;
! 967: err(2, NULL);
! 968: }
! 969: if (!gnusort_numeric_compatibility) {
! 970: if (symbol_decimal_point == sort_opts_vals.field_sep)
! 971: symbol_decimal_point = WEOF;
! 972: if (symbol_thousands_sep == sort_opts_vals.field_sep)
! 973: symbol_thousands_sep = WEOF;
! 974: if (symbol_negative_sign == sort_opts_vals.field_sep)
! 975: symbol_negative_sign = WEOF;
! 976: if (symbol_positive_sign == sort_opts_vals.field_sep)
! 977: symbol_positive_sign = WEOF;
! 978: }
! 979: break;
! 980: case 'u':
! 981: sort_opts_vals.uflag = true;
! 982: /* stable sort for the correct unique val */
! 983: sort_opts_vals.sflag = true;
! 984: break;
! 985: case 'z':
! 986: sort_opts_vals.zflag = true;
! 987: break;
! 988: case SORT_OPT:
! 989: if (optarg) {
! 990: if (!strcmp(optarg, "general-numeric"))
! 991: set_sort_modifier(sm, 'g');
! 992: else if (!strcmp(optarg, "human-numeric"))
! 993: set_sort_modifier(sm, 'h');
! 994: else if (!strcmp(optarg, "numeric"))
! 995: set_sort_modifier(sm, 'n');
! 996: else if (!strcmp(optarg, "month"))
! 997: set_sort_modifier(sm, 'M');
! 998: else if (!strcmp(optarg, "random"))
! 999: set_sort_modifier(sm, 'R');
! 1000: else
! 1001: unknown(optarg);
! 1002: }
! 1003: break;
! 1004: case QSORT_OPT:
! 1005: sort_opts_vals.sort_method = SORT_QSORT;
! 1006: break;
! 1007: case 'H':
! 1008: sort_opts_vals.sort_method = SORT_MERGESORT;
! 1009: break;
! 1010: case MMAP_OPT:
! 1011: use_mmap = true;
! 1012: break;
! 1013: case HEAPSORT_OPT:
! 1014: sort_opts_vals.sort_method = SORT_HEAPSORT;
! 1015: break;
! 1016: case RADIXSORT_OPT:
! 1017: sort_opts_vals.sort_method = SORT_RADIXSORT;
! 1018: break;
! 1019: case RANDOMSOURCE_OPT:
! 1020: random_source = strdup(optarg);
! 1021: break;
! 1022: case COMPRESSPROGRAM_OPT:
! 1023: compress_program = strdup(optarg);
! 1024: break;
! 1025: case FF_OPT:
! 1026: read_fns_from_file0(optarg);
! 1027: break;
! 1028: case BS_OPT:
! 1029: {
! 1030: errno = 0;
! 1031: long mof = strtol(optarg, NULL, 10);
! 1032: if (errno != 0)
! 1033: err(2, "--batch-size");
! 1034: if (mof >= 2)
! 1035: max_open_files = (size_t) mof + 1;
! 1036: }
! 1037: break;
! 1038: case VERSION_OPT:
! 1039: printf("%s\n", VERSION);
! 1040: exit(EXIT_SUCCESS);
! 1041: /* NOTREACHED */
! 1042: break;
! 1043: case DEBUG_OPT:
! 1044: debug_sort = true;
! 1045: break;
! 1046: case HELP_OPT:
! 1047: usage(0);
! 1048: /* NOTREACHED */
! 1049: break;
! 1050: default:
! 1051: usage(2);
! 1052: /* NOTREACHED */
! 1053: }
! 1054: }
! 1055: }
! 1056:
! 1057: argc -= optind;
! 1058: argv += optind;
! 1059:
! 1060: if (sort_opts_vals.cflag && sort_opts_vals.mflag)
! 1061: errx(1, "%c:%c: mutually exclusive flags", 'm', 'c');
! 1062:
! 1063: if (keys_num == 0) {
! 1064: keys_num = 1;
! 1065: keys = sort_realloc(keys, sizeof(struct key_specs));
! 1066: memset(&(keys[0]), 0, sizeof(struct key_specs));
! 1067: keys[0].c1 = 1;
! 1068: keys[0].pos1b = default_sort_mods->bflag;
! 1069: keys[0].pos2b = default_sort_mods->bflag;
! 1070: memcpy(&(keys[0].sm), default_sort_mods,
! 1071: sizeof(struct sort_mods));
! 1072: }
! 1073:
! 1074: for (i = 0; i < keys_num; i++) {
! 1075: struct key_specs *ks;
! 1076:
! 1077: ks = &(keys[i]);
! 1078:
! 1079: if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
! 1080: !(ks->pos2b)) {
! 1081: ks->pos1b = sm->bflag;
! 1082: ks->pos2b = sm->bflag;
! 1083: memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
! 1084: }
! 1085:
! 1086: ks->sm.func = get_sort_func(&(ks->sm));
! 1087: }
! 1088:
! 1089: if (argv_from_file0) {
! 1090: argc = argc_from_file0;
! 1091: argv = argv_from_file0;
! 1092: }
! 1093:
! 1094: if (debug_sort) {
! 1095: printf("Memory to be used for sorting: %llu\n",
! 1096: available_free_memory);
! 1097: printf("Using collate rules of %s locale\n",
! 1098: setlocale(LC_COLLATE, NULL));
! 1099: if (byte_sort)
! 1100: printf("Byte sort is used\n");
! 1101: if (print_symbols_on_debug) {
! 1102: printf("Decimal Point: <%lc>\n", symbol_decimal_point);
! 1103: if (symbol_thousands_sep)
! 1104: printf("Thousands separator: <%lc>\n",
! 1105: symbol_thousands_sep);
! 1106: printf("Positive sign: <%lc>\n", symbol_positive_sign);
! 1107: printf("Negative sign: <%lc>\n", symbol_negative_sign);
! 1108: }
! 1109: }
! 1110:
! 1111: set_random_seed();
1.4 millert 1112:
1.44 ! millert 1113: /* Case when the outfile equals one of the input files: */
! 1114: if (strcmp(outfile, "-")) {
! 1115: int i;
! 1116:
! 1117: for (i = 0; i < argc; ++i) {
! 1118: if (strcmp(argv[i], outfile) == 0) {
! 1119: real_outfile = sort_strdup(outfile);
! 1120: for (;;) {
! 1121: const size_t size = strlen(outfile) + strlen(".tmp") + 1;
! 1122: outfile = sort_realloc(outfile, size);
! 1123: strlcat(outfile, ".tmp", size);
! 1124: if (access(outfile, F_OK) < 0)
! 1125: break;
! 1126: }
! 1127: tmp_file_atexit(outfile);
! 1128: }
! 1129: }
! 1130: }
! 1131:
! 1132: if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
! 1133: struct file_list fl;
! 1134: struct sort_list list;
! 1135:
! 1136: sort_list_init(&list);
! 1137: file_list_init(&fl, true);
! 1138:
! 1139: if (argc < 1)
! 1140: procfile("-", &list, &fl);
! 1141: else {
! 1142: while (argc > 0) {
! 1143: procfile(*argv, &list, &fl);
! 1144: --argc;
! 1145: ++argv;
! 1146: }
! 1147: }
! 1148:
! 1149: if (fl.count < 1)
! 1150: sort_list_to_file(&list, outfile);
! 1151: else {
! 1152: if (list.count > 0) {
! 1153: char *flast = new_tmp_file_name();
! 1154:
! 1155: sort_list_to_file(&list, flast);
! 1156: file_list_add(&fl, flast, false);
! 1157: }
! 1158: merge_files(&fl, outfile);
! 1159: }
! 1160:
! 1161: file_list_clean(&fl);
! 1162:
! 1163: /*
! 1164: * We are about to exit the program, so we can ignore
! 1165: * the clean-up for speed
! 1166: *
! 1167: * sort_list_clean(&list);
! 1168: */
! 1169:
! 1170: } else if (sort_opts_vals.cflag) {
! 1171: result = (argc == 0) ? (check("-")) : (check(*argv));
! 1172: } else if (sort_opts_vals.mflag) {
! 1173: struct file_list fl;
! 1174:
! 1175: file_list_init(&fl, false);
! 1176: file_list_populate(&fl, argc, argv, true);
! 1177: merge_files(&fl, outfile);
! 1178: file_list_clean(&fl);
! 1179: }
! 1180:
! 1181: if (real_outfile) {
! 1182: unlink(real_outfile);
! 1183: if (rename(outfile, real_outfile) < 0)
! 1184: err(2, "%s", real_outfile);
! 1185: sort_free(real_outfile);
! 1186: }
! 1187:
! 1188: sort_free(outfile);
1.4 millert 1189:
1.44 ! millert 1190: return result;
1.1 millert 1191: }