Annotation of src/usr.bin/sort/coll.c, Revision 1.9
1.9 ! millert 1: /* $OpenBSD: coll.c,v 1.8 2015/04/02 22:14:51 deraadt Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6: * All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
30: #include <sys/types.h>
31:
32: #include <errno.h>
33: #include <err.h>
34: #include <langinfo.h>
35: #include <limits.h>
36: #include <math.h>
37: #include <md5.h>
38: #include <stdlib.h>
39: #include <string.h>
40: #include <wchar.h>
41: #include <wctype.h>
42:
43: #include "coll.h"
44: #include "vsort.h"
45:
46: struct key_specs *keys;
47: size_t keys_num = 0;
48:
49: wint_t symbol_decimal_point = L'.';
50: /* there is no default thousands separator in collate rules: */
51: wint_t symbol_thousands_sep = 0;
52: wint_t symbol_negative_sign = L'-';
53: wint_t symbol_positive_sign = L'+';
54:
55: static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
56: static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
57: static int monthcoll(struct key_value*, struct key_value *, size_t offset);
58: static int numcoll(struct key_value*, struct key_value *, size_t offset);
59: static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
60: static int randomcoll(struct key_value*, struct key_value *, size_t offset);
61: static int versioncoll(struct key_value*, struct key_value *, size_t offset);
62:
63: /*
64: * Allocate keys array
65: */
66: struct keys_array *
67: keys_array_alloc(void)
68: {
69: struct keys_array *ka;
70: size_t sz;
71:
72: sz = keys_array_size();
1.2 millert 73: ka = sort_calloc(1, sz);
1.1 millert 74:
75: return ka;
76: }
77:
78: /*
1.8 deraadt 79: * Calculate whether we need key hint space
1.1 millert 80: */
81: static size_t
82: key_hint_size(void)
83: {
84: return need_hint ? sizeof(struct key_hint) : 0;
85: }
86:
87: /*
88: * Calculate keys array size
89: */
90: size_t
91: keys_array_size(void)
92: {
93: return keys_num * (sizeof(struct key_value) + key_hint_size());
94: }
95:
96: /*
97: * Clean data of keys array
98: */
99: void
100: clean_keys_array(const struct bwstring *s, struct keys_array *ka)
101: {
102: if (ka) {
103: size_t i;
104:
105: for (i = 0; i < keys_num; ++i)
106: if (ka->key[i].k && ka->key[i].k != s)
107: bwsfree(ka->key[i].k);
108: memset(ka, 0, keys_array_size());
109: }
110: }
111:
112: /*
113: * Set value of a key in the keys set
114: */
115: void
116: set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
117: {
118: if (ka && keys_num > ind) {
119: struct key_value *kv;
120:
121: kv = &(ka->key[ind]);
122:
1.7 tobias 123: if (kv->k != s)
1.1 millert 124: bwsfree(kv->k);
125: kv->k = s;
126: }
127: }
128:
129: /*
130: * Initialize a sort list item
131: */
132: struct sort_list_item *
133: sort_list_item_alloc(void)
134: {
135: struct sort_list_item *si;
136: size_t sz;
137:
138: sz = sizeof(struct sort_list_item) + keys_array_size();
1.2 millert 139: si = sort_calloc(1, sz);
1.1 millert 140:
141: return si;
142: }
143:
144: size_t
145: sort_list_item_size(struct sort_list_item *si)
146: {
147: size_t i, ret = 0;
148:
149: if (si) {
150: ret = sizeof(struct sort_list_item) + keys_array_size();
151: if (si->str)
152: ret += bws_memsize(si->str);
153: for (i = 0; i < keys_num; ++i) {
154: struct key_value *kv;
155:
156: kv = &(si->ka.key[i]);
157:
158: if (kv->k != si->str)
159: ret += bws_memsize(kv->k);
160: }
161: }
162: return ret;
163: }
164:
165: /*
166: * Calculate key for a sort list item
167: */
168: static void
169: sort_list_item_make_key(struct sort_list_item *si)
170: {
171: preproc(si->str, &(si->ka));
172: }
173:
174: /*
175: * Set value of a sort list item.
176: * Return combined string and keys memory size.
177: */
178: void
179: sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
180: {
181: if (si) {
182: clean_keys_array(si->str, &(si->ka));
183: if (si->str) {
184: if (si->str == str) {
185: /* we are trying to reset the same string */
186: return;
187: } else {
188: bwsfree(si->str);
189: si->str = NULL;
190: }
191: }
192: si->str = str;
193: sort_list_item_make_key(si);
194: }
195: }
196:
197: /*
198: * De-allocate a sort list item object memory
199: */
200: void
201: sort_list_item_clean(struct sort_list_item *si)
202: {
203: if (si) {
204: clean_keys_array(si->str, &(si->ka));
205: if (si->str) {
206: bwsfree(si->str);
207: si->str = NULL;
208: }
209: }
210: }
211:
212: /*
213: * Skip columns according to specs
214: */
215: static size_t
216: skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
217: bool skip_blanks, bool *empty_key)
218: {
219: if (cols < 1)
220: return BWSLEN(s) + 1;
221:
222: if (skip_blanks)
223: while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
224: ++start;
225:
226: while (start < BWSLEN(s) && cols > 1) {
227: --cols;
228: ++start;
229: }
230:
231: if (start >= BWSLEN(s))
232: *empty_key = true;
233:
234: return start;
235: }
236:
237: /*
238: * Skip fields according to specs
239: */
240: static size_t
241: skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
242: {
243: if (fields < 2) {
244: if (BWSLEN(s) == 0)
245: *empty_field = true;
246: return 0;
247: } else if (!(sort_opts_vals.tflag)) {
248: size_t cpos = 0;
249: bool pb = true;
250:
251: while (cpos < BWSLEN(s)) {
252: bool isblank;
253:
254: isblank = iswblank(BWS_GET(s, cpos));
255:
256: if (isblank && !pb) {
257: --fields;
258: if (fields <= 1)
259: return cpos;
260: }
261: pb = isblank;
262: ++cpos;
263: }
264: if (fields > 1)
265: *empty_field = true;
266: return cpos;
267: } else {
268: size_t cpos = 0;
269:
270: while (cpos < BWSLEN(s)) {
271: if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
272: --fields;
273: if (fields <= 1)
274: return cpos + 1;
275: }
276: ++cpos;
277: }
278: if (fields > 1)
279: *empty_field = true;
280: return cpos;
281: }
282: }
283:
284: /*
285: * Find fields start
286: */
287: static void
288: find_field_start(const struct bwstring *s, struct key_specs *ks,
289: size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
290: {
291: *field_start = skip_fields_to_start(s, ks->f1, empty_field);
292: if (!*empty_field)
293: *key_start = skip_cols_to_start(s, ks->c1, *field_start,
294: ks->pos1b, empty_key);
295: else
296: *empty_key = true;
297: }
298:
299: /*
300: * Find end key position
301: */
302: static size_t
303: find_field_end(const struct bwstring *s, struct key_specs *ks)
304: {
305: size_t f2, next_field_start, pos_end;
306: bool empty_field, empty_key;
307:
308: empty_field = false;
309: empty_key = false;
310: f2 = ks->f2;
311:
312: if (f2 == 0)
313: return BWSLEN(s) + 1;
314: else {
315: if (ks->c2 == 0) {
316: next_field_start = skip_fields_to_start(s, f2 + 1,
317: &empty_field);
318: if ((next_field_start > 0) && sort_opts_vals.tflag &&
319: ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
320: next_field_start - 1)))
321: --next_field_start;
322: } else
323: next_field_start = skip_fields_to_start(s, f2,
324: &empty_field);
325: }
326:
327: if (empty_field || (next_field_start >= BWSLEN(s)))
328: return BWSLEN(s) + 1;
329:
330: if (ks->c2) {
331: pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
332: ks->pos2b, &empty_key);
333: if (pos_end < BWSLEN(s))
334: ++pos_end;
335: } else
336: pos_end = next_field_start;
337:
338: return pos_end;
339: }
340:
341: /*
342: * Cut a field according to the key specs
343: */
344: static struct bwstring *
345: cut_field(const struct bwstring *s, struct key_specs *ks)
346: {
347: struct bwstring *ret = NULL;
348:
349: if (s && ks) {
350: size_t field_start, key_end, key_start, sz;
351: bool empty_field, empty_key;
352:
353: field_start = 0;
354: key_start = 0;
355: empty_field = false;
356: empty_key = false;
357:
358: find_field_start(s, ks, &field_start, &key_start,
359: &empty_field, &empty_key);
360:
361: if (empty_key)
362: sz = 0;
363: else {
364: key_end = find_field_end(s, ks);
365: sz = (key_end < key_start) ? 0 : (key_end - key_start);
366: }
367:
368: ret = bwsalloc(sz);
369: if (sz)
370: bwsnocpy(ret, s, key_start, sz);
371: } else
372: ret = bwsalloc(0);
373:
374: return ret;
375: }
376:
377: /*
378: * Preprocesses a line applying the necessary transformations
379: * specified by command line options and returns the preprocessed
380: * string, which can be used to compare.
381: */
382: int
383: preproc(struct bwstring *s, struct keys_array *ka)
384: {
385: if (sort_opts_vals.kflag) {
386: size_t i;
387: for (i = 0; i < keys_num; i++) {
388: struct bwstring *key;
389: struct key_specs *kspecs;
390: struct sort_mods *sm;
391:
392: kspecs = &(keys[i]);
393: key = cut_field(s, kspecs);
394:
395: sm = &(kspecs->sm);
396: if (sm->dflag)
397: key = dictionary_order(key);
398: else if (sm->iflag)
399: key = ignore_nonprinting(key);
400: if (sm->fflag || sm->Mflag)
401: key = ignore_case(key);
402:
403: set_key_on_keys_array(ka, key, i);
404: }
405: } else {
406: struct bwstring *ret = NULL;
407: struct sort_mods *sm = default_sort_mods;
408:
1.9 ! millert 409: #ifdef GNUSORT_COMPATIBILITY
1.1 millert 410: if (sm->bflag) {
411: if (ret == NULL)
412: ret = bwsdup(s);
413: ret = ignore_leading_blanks(ret);
414: }
1.9 ! millert 415: #endif
1.1 millert 416: if (sm->dflag) {
417: if (ret == NULL)
418: ret = bwsdup(s);
419: ret = dictionary_order(ret);
420: } else if (sm->iflag) {
421: if (ret == NULL)
422: ret = bwsdup(s);
423: ret = ignore_nonprinting(ret);
424: }
425: if (sm->fflag || sm->Mflag) {
426: if (ret == NULL)
427: ret = bwsdup(s);
428: ret = ignore_case(ret);
429: }
430: if (ret == NULL)
431: set_key_on_keys_array(ka, s, 0);
432: else
433: set_key_on_keys_array(ka, ret, 0);
434: }
435:
436: return 0;
437: }
438:
439: cmpcoll_t
440: get_sort_func(struct sort_mods *sm)
441: {
442: if (sm->nflag)
443: return numcoll;
444: else if (sm->hflag)
445: return hnumcoll;
446: else if (sm->gflag)
447: return gnumcoll;
448: else if (sm->Mflag)
449: return monthcoll;
450: else if (sm->Rflag)
451: return randomcoll;
452: else if (sm->Vflag)
453: return versioncoll;
454: else
455: return wstrcoll;
456: }
457:
458: /*
459: * Compares the given strings. Returns a positive number if
460: * the first precedes the second, a negative number if the second is
461: * the preceding one, and zero if they are equal. This function calls
462: * the underlying collate functions, which done the actual comparison.
463: */
464: int
465: key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
466: {
467: struct sort_mods *sm;
468: int res = 0;
469: size_t i;
470:
471: for (i = 0; i < keys_num; ++i) {
472: sm = &(keys[i].sm);
473:
474: if (sm->rflag)
475: res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
476: else
477: res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
478:
479: if (res)
480: break;
481:
482: /* offset applies to only the first key */
483: offset = 0;
484: }
485: return res;
486: }
487:
488: /*
489: * Compare two strings.
490: * Plain symbol-by-symbol comparison.
491: */
492: int
493: top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
494: {
495: if (default_sort_mods->rflag) {
496: const struct bwstring *tmp;
497:
498: tmp = s1;
499: s1 = s2;
500: s2 = tmp;
501: }
502:
503: return bwscoll(s1, s2, 0);
504: }
505:
506: /*
507: * Compare a string and a sort list item, according to the sort specs.
508: */
509: int
510: str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
511: {
512: struct keys_array *ka1;
513: int ret = 0;
514:
515: ka1 = keys_array_alloc();
516:
517: preproc(str1, ka1);
518:
519: sort_list_item_make_key(*ss2);
520:
521: if (debug_sort) {
522: bwsprintf(stdout, str1, "; s1=<", ">");
523: bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
524: }
525:
526: ret = key_coll(ka1, &((*ss2)->ka), 0);
527:
528: if (debug_sort)
529: printf("; cmp1=%d", ret);
530:
531: clean_keys_array(str1, ka1);
532: sort_free(ka1);
533:
534: if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
535: ret = top_level_str_coll(str1, ((*ss2)->str));
536: if (debug_sort)
537: printf("; cmp2=%d", ret);
538: }
539:
540: if (debug_sort)
541: putchar('\n');
542:
543: return ret;
544: }
545:
546: /*
547: * Compare two sort list items, according to the sort specs.
548: */
549: int
550: list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
551: size_t offset)
552: {
553: int ret;
554:
555: ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
556:
557: if (debug_sort) {
558: if (offset)
559: printf("; offset=%d", (int) offset);
560: bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
561: bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
562: printf("; cmp1=%d\n", ret);
563: }
564:
565: if (ret)
566: return ret;
567:
568: if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
569: ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
570: if (debug_sort)
571: printf("; cmp2=%d\n", ret);
572: }
573:
574: return ret;
575: }
576:
577: /*
578: * Compare two sort list items, according to the sort specs.
579: */
580: int
581: list_coll(const void *ss1, const void *ss2)
582: {
583: return list_coll_offset((struct sort_list_item **)ss1,
584: (struct sort_list_item **)ss2, 0);
585: }
586:
1.8 deraadt 587: #define LSCDEF(N) \
588: static int \
1.1 millert 589: list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
590: { \
591: \
592: return list_coll_offset(ss1, ss2, N); \
593: }
594:
595: LSCDEF(0)
596: LSCDEF(1)
597: LSCDEF(2)
598: LSCDEF(3)
599: LSCDEF(4)
600: LSCDEF(5)
601: LSCDEF(6)
602: LSCDEF(7)
603: LSCDEF(8)
604: LSCDEF(9)
605: LSCDEF(10)
606: LSCDEF(11)
607: LSCDEF(12)
608: LSCDEF(13)
609: LSCDEF(14)
610: LSCDEF(15)
611: LSCDEF(16)
612: LSCDEF(17)
613: LSCDEF(18)
614: LSCDEF(19)
615: LSCDEF(20)
616:
617: listcoll_t
618: get_list_call_func(size_t offset)
619: {
620: static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
621: list_coll_2, list_coll_3, list_coll_4, list_coll_5,
622: list_coll_6, list_coll_7, list_coll_8, list_coll_9,
623: list_coll_10, list_coll_11, list_coll_12, list_coll_13,
624: list_coll_14, list_coll_15, list_coll_16, list_coll_17,
625: list_coll_18, list_coll_19, list_coll_20 };
626:
627: if (offset <= 20)
628: return lsarray[offset];
629:
630: return list_coll_0;
631: }
632:
633: /*
634: * Compare two sort list items, only by their original string.
635: */
636: int
637: list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
638: {
639: return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
640: }
641:
642: /*
643: * Maximum size of a number in the string (before or after decimal point)
644: */
645: #define MAX_NUM_SIZE (128)
646:
647: /*
648: * Set suffix value
649: */
650: static void
651: setsuffix(wchar_t c, unsigned char *si)
652: {
653: switch (c){
654: case L'k':
655: case L'K':
656: *si = 1;
657: break;
658: case L'M':
659: *si = 2;
660: break;
661: case L'G':
662: *si = 3;
663: break;
664: case L'T':
665: *si = 4;
666: break;
667: case L'P':
668: *si = 5;
669: break;
670: case L'E':
671: *si = 6;
672: break;
673: case L'Z':
674: *si = 7;
675: break;
676: case L'Y':
677: *si = 8;
678: break;
679: default:
680: *si = 0;
681: };
682: }
683:
684: /*
685: * Read string s and parse the string into a fixed-decimal-point number.
686: * sign equals -1 if the number is negative (explicit plus is not allowed,
687: * according to GNU sort's "info sort".
688: * The number part before decimal point is in the smain, after the decimal
689: * point is in sfrac, tail is the pointer to the remainder of the string.
690: */
691: static int
692: read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
693: {
694: bwstring_iterator s;
695:
696: s = bws_begin(s0);
697:
698: /* always end the fraction with zero, even if we have no fraction */
699: sfrac[0] = 0;
700:
701: while (iswblank(bws_get_iter_value(s)))
702: s = bws_iterator_inc(s, 1);
703:
704: if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
705: *sign = -1;
706: s = bws_iterator_inc(s, 1);
707: }
708:
709: // This is '0', not '\0', do not change this
710: while (iswdigit(bws_get_iter_value(s)) &&
711: (bws_get_iter_value(s) == L'0'))
712: s = bws_iterator_inc(s, 1);
713:
714: while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
715: if (iswdigit(bws_get_iter_value(s))) {
716: smain[*main_len] = bws_get_iter_value(s);
717: s = bws_iterator_inc(s, 1);
718: *main_len += 1;
719: } else if (symbol_thousands_sep &&
720: (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
721: s = bws_iterator_inc(s, 1);
722: else
723: break;
724: }
725:
726: smain[*main_len] = 0;
727:
728: if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
729: s = bws_iterator_inc(s, 1);
730: while (iswdigit(bws_get_iter_value(s)) &&
731: *frac_len < MAX_NUM_SIZE) {
732: sfrac[*frac_len] = bws_get_iter_value(s);
733: s = bws_iterator_inc(s, 1);
734: *frac_len += 1;
735: }
736: sfrac[*frac_len] = 0;
737:
738: while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
739: --(*frac_len);
740: sfrac[*frac_len] = L'\0';
741: }
742: }
743:
744: setsuffix(bws_get_iter_value(s), si);
745:
746: if ((*main_len + *frac_len) == 0)
747: *sign = 0;
748:
749: return 0;
750: }
751:
752: /*
753: * Implements string sort.
754: */
755: static int
756: wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
757: {
758:
759: if (debug_sort) {
760: if (offset)
761: printf("; offset=%d\n", (int) offset);
762: bwsprintf(stdout, kv1->k, "; k1=<", ">");
763: printf("(%zu)", BWSLEN(kv1->k));
764: bwsprintf(stdout, kv2->k, ", k2=<", ">");
765: printf("(%zu)", BWSLEN(kv2->k));
766: }
767:
768: return bwscoll(kv1->k, kv2->k, offset);
769: }
770:
771: /*
772: * Compare two suffixes
773: */
774: static inline int
775: cmpsuffix(unsigned char si1, unsigned char si2)
776: {
777: return (char)si1 - (char)si2;
778: }
779:
780: /*
781: * Implements numeric sort for -n and -h.
782: */
783: static int
784: numcoll_impl(struct key_value *kv1, struct key_value *kv2,
785: size_t offset __unused, bool use_suffix)
786: {
787: struct bwstring *s1, *s2;
788: wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
789: wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
790: int cmp_res, sign1, sign2;
791: size_t frac1, frac2, main1, main2;
792: unsigned char SI1, SI2;
793: bool e1, e2, key1_read, key2_read;
794:
795: s1 = kv1->k;
796: s2 = kv2->k;
797: sign1 = sign2 = 0;
798: main1 = main2 = 0;
799: frac1 = frac2 = 0;
800:
801: key1_read = key2_read = false;
802:
803: if (debug_sort) {
804: bwsprintf(stdout, s1, "; k1=<", ">");
805: bwsprintf(stdout, s2, ", k2=<", ">");
806: }
807:
808: if (s1 == s2)
809: return 0;
810:
811: if (kv1->hint->status == HS_UNINITIALIZED) {
812: /* read the number from the string */
813: read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
814: key1_read = true;
815: kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
816: if (main1 < 1 && frac1 < 1)
817: kv1->hint->v.nh.empty=true;
818: kv1->hint->v.nh.si = SI1;
819: kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
820: HS_INITIALIZED : HS_ERROR;
821: kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
822: }
823:
824: if (kv2->hint->status == HS_UNINITIALIZED) {
825: /* read the number from the string */
826: read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
827: key2_read = true;
828: kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
829: if (main2 < 1 && frac2 < 1)
830: kv2->hint->v.nh.empty=true;
831: kv2->hint->v.nh.si = SI2;
832: kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
833: HS_INITIALIZED : HS_ERROR;
834: kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
835: }
836:
837: if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
838: HS_INITIALIZED) {
839: unsigned long long n1, n2;
840: bool neg1, neg2;
841:
842: e1 = kv1->hint->v.nh.empty;
843: e2 = kv2->hint->v.nh.empty;
844:
845: if (e1 && e2)
846: return 0;
847:
848: neg1 = kv1->hint->v.nh.neg;
849: neg2 = kv2->hint->v.nh.neg;
850:
851: if (neg1 && !neg2)
852: return -1;
853: if (neg2 && !neg1)
854: return 1;
855:
856: if (e1)
857: return neg2 ? 1 : -1;
858: else if (e2)
859: return neg1 ? -1 : 1;
860:
861:
862: if (use_suffix) {
863: cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
864: if (cmp_res)
865: return neg1 ? -cmp_res : cmp_res;
866: }
867:
868: n1 = kv1->hint->v.nh.n1;
869: n2 = kv2->hint->v.nh.n1;
870: if (n1 < n2)
871: return neg1 ? 1 : -1;
872: else if (n1 > n2)
873: return neg1 ? -1 : 1;
874: }
875:
876: /* read the numbers from the strings */
877: if (!key1_read)
878: read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
879: if (!key2_read)
880: read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
881:
882: e1 = ((main1 + frac1) == 0);
883: e2 = ((main2 + frac2) == 0);
884:
885: if (e1 && e2)
886: return 0;
887:
888: /* we know the result if the signs are different */
889: if (sign1 < 0 && sign2 >= 0)
890: return -1;
891: if (sign1 >= 0 && sign2 < 0)
892: return 1;
893:
894: if (e1)
895: return (sign2 < 0) ? +1 : -1;
896: else if (e2)
897: return (sign1 < 0) ? -1 : 1;
898:
899: if (use_suffix) {
900: cmp_res = cmpsuffix(SI1, SI2);
901: if (cmp_res)
902: return (sign1 < 0) ? -cmp_res : cmp_res;
903: }
904:
905: /* if both numbers are empty assume that the strings are equal */
906: if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
907: return 0;
908:
909: /*
910: * if the main part is of different size, we know the result
911: * (because the leading zeros are removed)
912: */
913: if (main1 < main2)
914: cmp_res = -1;
915: else if (main1 > main2)
916: cmp_res = +1;
917: /* if the sizes are equal then simple non-collate string compare gives the correct result */
918: else
919: cmp_res = wcscmp(smain1, smain2);
920:
921: /* check fraction */
922: if (!cmp_res)
923: cmp_res = wcscmp(sfrac1, sfrac2);
924:
925: if (!cmp_res)
926: return 0;
927:
928: /* reverse result if the signs are negative */
929: if (sign1 < 0 && sign2 < 0)
930: cmp_res = -cmp_res;
931:
932: return cmp_res;
933: }
934:
935: /*
936: * Implements numeric sort (-n).
937: */
938: static int
939: numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
940: {
941: return numcoll_impl(kv1, kv2, offset, false);
942: }
943:
944: /*
945: * Implements 'human' numeric sort (-h).
946: */
947: static int
948: hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
949: {
950: return numcoll_impl(kv1, kv2, offset, true);
951: }
952:
953: /*
954: * Implements random sort (-R).
955: */
956: static int
957: randomcoll(struct key_value *kv1, struct key_value *kv2,
958: size_t offset __unused)
959: {
960: struct bwstring *s1, *s2;
961: MD5_CTX ctx1, ctx2;
962: char *b1, *b2;
963:
964: s1 = kv1->k;
965: s2 = kv2->k;
966:
967: if (debug_sort) {
968: bwsprintf(stdout, s1, "; k1=<", ">");
969: bwsprintf(stdout, s2, ", k2=<", ">");
970: }
971:
972: if (s1 == s2)
973: return 0;
974:
975: memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
976: memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
977:
978: MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
979: MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
980: b1 = MD5End(&ctx1, NULL);
981: b2 = MD5End(&ctx2, NULL);
982: if (b1 == NULL) {
983: if (b2 == NULL)
984: return 0;
985: else {
986: sort_free(b2);
987: return -1;
988: }
989: } else if (b2 == NULL) {
990: sort_free(b1);
991: return 1;
992: } else {
993: int cmp_res;
994:
995: cmp_res = strcmp(b1, b2);
996: sort_free(b1);
997: sort_free(b2);
998:
999: if (!cmp_res)
1000: cmp_res = bwscoll(s1, s2, 0);
1001:
1002: return cmp_res;
1003: }
1004: }
1005:
1006: /*
1007: * Implements version sort (-V).
1008: */
1009: static int
1010: versioncoll(struct key_value *kv1, struct key_value *kv2,
1011: size_t offset __unused)
1012: {
1013: struct bwstring *s1, *s2;
1014:
1015: s1 = kv1->k;
1016: s2 = kv2->k;
1017:
1018: if (debug_sort) {
1019: bwsprintf(stdout, s1, "; k1=<", ">");
1020: bwsprintf(stdout, s2, ", k2=<", ">");
1021: }
1022:
1023: if (s1 == s2)
1024: return 0;
1025:
1026: return vcmp(s1, s2);
1027: }
1028:
1029: /*
1030: * Check for minus infinity
1031: */
1032: static inline bool
1033: huge_minus(double d, int err1)
1034: {
1035: if (err1 == ERANGE)
1036: if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1037: return 1;
1038:
1039: return 0;
1040: }
1041:
1042: /*
1043: * Check for plus infinity
1044: */
1045: static inline bool
1046: huge_plus(double d, int err1)
1047: {
1048: if (err1 == ERANGE)
1049: if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1050: return 1;
1051:
1052: return 0;
1053: }
1054:
1055: /*
1056: * Check whether a function is a NAN
1057: */
1058: static bool
1059: is_nan(double d)
1060: {
1.3 miod 1061: #if defined(NAN)
1.1 millert 1062: return (d == NAN || isnan(d));
1.3 miod 1063: #else
1064: return (isnan(d));
1065: #endif
1.1 millert 1066: }
1067:
1068: /*
1069: * Compare two NANs
1070: */
1071: static int
1072: cmp_nans(double d1, double d2)
1073: {
1074: if (d1 == d2)
1075: return 0;
1076: return d1 < d2 ? -1 : 1;
1077: }
1078:
1079: /*
1080: * Implements general numeric sort (-g).
1081: */
1082: static int
1083: gnumcoll(struct key_value *kv1, struct key_value *kv2,
1084: size_t offset __unused)
1085: {
1086: double d1, d2;
1087: int err1, err2;
1088: bool empty1, empty2, key1_read, key2_read;
1089:
1090: d1 = d2 = 0;
1091: err1 = err2 = 0;
1092: key1_read = key2_read = false;
1093:
1094: if (debug_sort) {
1095: bwsprintf(stdout, kv1->k, "; k1=<", ">");
1096: bwsprintf(stdout, kv2->k, "; k2=<", ">");
1097: }
1098:
1099: if (kv1->hint->status == HS_UNINITIALIZED) {
1100: errno = 0;
1101: d1 = bwstod(kv1->k, &empty1);
1102: err1 = errno;
1103:
1104: if (empty1)
1105: kv1->hint->v.gh.notnum = true;
1106: else if (err1 == 0) {
1107: kv1->hint->v.gh.d = d1;
1108: kv1->hint->v.gh.nan = is_nan(d1);
1109: kv1->hint->status = HS_INITIALIZED;
1110: } else
1111: kv1->hint->status = HS_ERROR;
1112:
1113: key1_read = true;
1114: }
1115:
1116: if (kv2->hint->status == HS_UNINITIALIZED) {
1117: errno = 0;
1118: d2 = bwstod(kv2->k, &empty2);
1119: err2 = errno;
1120:
1121: if (empty2)
1122: kv2->hint->v.gh.notnum = true;
1123: else if (err2 == 0) {
1124: kv2->hint->v.gh.d = d2;
1125: kv2->hint->v.gh.nan = is_nan(d2);
1126: kv2->hint->status = HS_INITIALIZED;
1127: } else
1128: kv2->hint->status = HS_ERROR;
1129:
1130: key2_read = true;
1131: }
1132:
1133: if (kv1->hint->status == HS_INITIALIZED &&
1134: kv2->hint->status == HS_INITIALIZED) {
1135: if (kv1->hint->v.gh.notnum)
1136: return kv2->hint->v.gh.notnum ? 0 : -1;
1137: else if (kv2->hint->v.gh.notnum)
1138: return 1;
1139:
1140: if (kv1->hint->v.gh.nan)
1141: return kv2->hint->v.gh.nan ?
1142: cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
1143: else if (kv2->hint->v.gh.nan)
1144: return 1;
1145:
1146: d1 = kv1->hint->v.gh.d;
1147: d2 = kv2->hint->v.gh.d;
1148:
1149: if (d1 < d2)
1150: return -1;
1151: else if (d1 > d2)
1152: return 1;
1153: else
1154: return 0;
1155: }
1156:
1157: if (!key1_read) {
1158: errno = 0;
1159: d1 = bwstod(kv1->k, &empty1);
1160: err1 = errno;
1161: }
1162:
1163: if (!key2_read) {
1164: errno = 0;
1165: d2 = bwstod(kv2->k, &empty2);
1166: err2 = errno;
1167: }
1168:
1169: /* Non-value case: */
1170: if (empty1)
1171: return empty2 ? 0 : -1;
1172: else if (empty2)
1173: return 1;
1174:
1175: /* NAN case */
1176: if (is_nan(d1))
1177: return is_nan(d2) ? cmp_nans(d1, d2) : -1;
1178: else if (is_nan(d2))
1179: return 1;
1180:
1181: /* Infinities */
1182: if (err1 == ERANGE || err2 == ERANGE) {
1183: /* Minus infinity case */
1184: if (huge_minus(d1, err1)) {
1185: if (huge_minus(d2, err2)) {
1186: if (d1 == d2)
1187: return 0;
1188: return d1 < d2 ? -1 : 1;
1189: } else
1190: return -1;
1191:
1192: } else if (huge_minus(d2, err2)) {
1193: if (huge_minus(d1, err1)) {
1194: if (d1 == d2)
1195: return 0;
1196: return d1 < d2 ? -1 : 1;
1197: } else
1198: return 1;
1199: }
1200:
1201: /* Plus infinity case */
1202: if (huge_plus(d1, err1)) {
1203: if (huge_plus(d2, err2)) {
1204: if (d1 == d2)
1205: return 0;
1206: return d1 < d2 ? -1 : 1;
1207: } else
1208: return 1;
1209: } else if (huge_plus(d2, err2)) {
1210: if (huge_plus(d1, err1)) {
1211: if (d1 == d2)
1212: return 0;
1213: return d1 < d2 ? -1 : 1;
1214: } else
1215: return -1;
1216: }
1217: }
1218:
1219: if (d1 == d2)
1220: return 0;
1221: return d1 < d2 ? -1 : 1;
1222: }
1223:
1224: /*
1225: * Implements month sort (-M).
1226: */
1227: static int
1228: monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1229: {
1230: int val1, val2;
1231: bool key1_read, key2_read;
1232:
1233: val1 = val2 = 0;
1234: key1_read = key2_read = false;
1235:
1236: if (debug_sort) {
1237: bwsprintf(stdout, kv1->k, "; k1=<", ">");
1238: bwsprintf(stdout, kv2->k, "; k2=<", ">");
1239: }
1240:
1241: if (kv1->hint->status == HS_UNINITIALIZED) {
1242: kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1243: key1_read = true;
1244: kv1->hint->status = HS_INITIALIZED;
1245: }
1246:
1247: if (kv2->hint->status == HS_UNINITIALIZED) {
1248: kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1249: key2_read = true;
1250: kv2->hint->status = HS_INITIALIZED;
1251: }
1252:
1253: if (kv1->hint->status == HS_INITIALIZED) {
1254: val1 = kv1->hint->v.Mh.m;
1255: key1_read = true;
1256: }
1257:
1258: if (kv2->hint->status == HS_INITIALIZED) {
1259: val2 = kv2->hint->v.Mh.m;
1260: key2_read = true;
1261: }
1262:
1263: if (!key1_read)
1264: val1 = bws_month_score(kv1->k);
1265: if (!key2_read)
1266: val2 = bws_month_score(kv2->k);
1267:
1268: if (val1 == val2)
1269: return 0;
1270: return val1 < val2 ? -1 : 1;
1271: }