Annotation of src/usr.bin/sort/coll.c, Revision 1.3
1.3 ! miod 1: /* $OpenBSD: coll.c,v 1.2 2015/03/17 17:49:27 millert Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6: * All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
30: #include <sys/types.h>
31:
32: #include <errno.h>
33: #include <err.h>
34: #include <langinfo.h>
35: #include <limits.h>
36: #include <math.h>
37: #include <md5.h>
38: #include <stdlib.h>
39: #include <string.h>
40: #include <wchar.h>
41: #include <wctype.h>
42:
43: #include "coll.h"
44: #include "vsort.h"
45:
46: struct key_specs *keys;
47: size_t keys_num = 0;
48:
49: wint_t symbol_decimal_point = L'.';
50: /* there is no default thousands separator in collate rules: */
51: wint_t symbol_thousands_sep = 0;
52: wint_t symbol_negative_sign = L'-';
53: wint_t symbol_positive_sign = L'+';
54:
55: static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
56: static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
57: static int monthcoll(struct key_value*, struct key_value *, size_t offset);
58: static int numcoll(struct key_value*, struct key_value *, size_t offset);
59: static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
60: static int randomcoll(struct key_value*, struct key_value *, size_t offset);
61: static int versioncoll(struct key_value*, struct key_value *, size_t offset);
62:
63: /*
64: * Allocate keys array
65: */
66: struct keys_array *
67: keys_array_alloc(void)
68: {
69: struct keys_array *ka;
70: size_t sz;
71:
72: sz = keys_array_size();
1.2 millert 73: ka = sort_calloc(1, sz);
1.1 millert 74:
75: return ka;
76: }
77:
78: /*
79: * Calculate whether we need key hint space
80: */
81: static size_t
82: key_hint_size(void)
83: {
84:
85: return need_hint ? sizeof(struct key_hint) : 0;
86: }
87:
88: /*
89: * Calculate keys array size
90: */
91: size_t
92: keys_array_size(void)
93: {
94:
95: return keys_num * (sizeof(struct key_value) + key_hint_size());
96: }
97:
98: /*
99: * Clean data of keys array
100: */
101: void
102: clean_keys_array(const struct bwstring *s, struct keys_array *ka)
103: {
104:
105: if (ka) {
106: size_t i;
107:
108: for (i = 0; i < keys_num; ++i)
109: if (ka->key[i].k && ka->key[i].k != s)
110: bwsfree(ka->key[i].k);
111: memset(ka, 0, keys_array_size());
112: }
113: }
114:
115: /*
116: * Set value of a key in the keys set
117: */
118: void
119: set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
120: {
121:
122: if (ka && keys_num > ind) {
123: struct key_value *kv;
124:
125: kv = &(ka->key[ind]);
126:
127: if (kv->k && kv->k != s)
128: bwsfree(kv->k);
129: kv->k = s;
130: }
131: }
132:
133: /*
134: * Initialize a sort list item
135: */
136: struct sort_list_item *
137: sort_list_item_alloc(void)
138: {
139: struct sort_list_item *si;
140: size_t sz;
141:
142: sz = sizeof(struct sort_list_item) + keys_array_size();
1.2 millert 143: si = sort_calloc(1, sz);
1.1 millert 144:
145: return si;
146: }
147:
148: size_t
149: sort_list_item_size(struct sort_list_item *si)
150: {
151: size_t i, ret = 0;
152:
153: if (si) {
154: ret = sizeof(struct sort_list_item) + keys_array_size();
155: if (si->str)
156: ret += bws_memsize(si->str);
157: for (i = 0; i < keys_num; ++i) {
158: struct key_value *kv;
159:
160: kv = &(si->ka.key[i]);
161:
162: if (kv->k != si->str)
163: ret += bws_memsize(kv->k);
164: }
165: }
166: return ret;
167: }
168:
169: /*
170: * Calculate key for a sort list item
171: */
172: static void
173: sort_list_item_make_key(struct sort_list_item *si)
174: {
175:
176: preproc(si->str, &(si->ka));
177: }
178:
179: /*
180: * Set value of a sort list item.
181: * Return combined string and keys memory size.
182: */
183: void
184: sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
185: {
186:
187: if (si) {
188: clean_keys_array(si->str, &(si->ka));
189: if (si->str) {
190: if (si->str == str) {
191: /* we are trying to reset the same string */
192: return;
193: } else {
194: bwsfree(si->str);
195: si->str = NULL;
196: }
197: }
198: si->str = str;
199: sort_list_item_make_key(si);
200: }
201: }
202:
203: /*
204: * De-allocate a sort list item object memory
205: */
206: void
207: sort_list_item_clean(struct sort_list_item *si)
208: {
209:
210: if (si) {
211: clean_keys_array(si->str, &(si->ka));
212: if (si->str) {
213: bwsfree(si->str);
214: si->str = NULL;
215: }
216: }
217: }
218:
219: /*
220: * Skip columns according to specs
221: */
222: static size_t
223: skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
224: bool skip_blanks, bool *empty_key)
225: {
226: if (cols < 1)
227: return BWSLEN(s) + 1;
228:
229: if (skip_blanks)
230: while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
231: ++start;
232:
233: while (start < BWSLEN(s) && cols > 1) {
234: --cols;
235: ++start;
236: }
237:
238: if (start >= BWSLEN(s))
239: *empty_key = true;
240:
241: return start;
242: }
243:
244: /*
245: * Skip fields according to specs
246: */
247: static size_t
248: skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
249: {
250:
251: if (fields < 2) {
252: if (BWSLEN(s) == 0)
253: *empty_field = true;
254: return 0;
255: } else if (!(sort_opts_vals.tflag)) {
256: size_t cpos = 0;
257: bool pb = true;
258:
259: while (cpos < BWSLEN(s)) {
260: bool isblank;
261:
262: isblank = iswblank(BWS_GET(s, cpos));
263:
264: if (isblank && !pb) {
265: --fields;
266: if (fields <= 1)
267: return cpos;
268: }
269: pb = isblank;
270: ++cpos;
271: }
272: if (fields > 1)
273: *empty_field = true;
274: return cpos;
275: } else {
276: size_t cpos = 0;
277:
278: while (cpos < BWSLEN(s)) {
279: if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
280: --fields;
281: if (fields <= 1)
282: return cpos + 1;
283: }
284: ++cpos;
285: }
286: if (fields > 1)
287: *empty_field = true;
288: return cpos;
289: }
290: }
291:
292: /*
293: * Find fields start
294: */
295: static void
296: find_field_start(const struct bwstring *s, struct key_specs *ks,
297: size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
298: {
299:
300: *field_start = skip_fields_to_start(s, ks->f1, empty_field);
301: if (!*empty_field)
302: *key_start = skip_cols_to_start(s, ks->c1, *field_start,
303: ks->pos1b, empty_key);
304: else
305: *empty_key = true;
306: }
307:
308: /*
309: * Find end key position
310: */
311: static size_t
312: find_field_end(const struct bwstring *s, struct key_specs *ks)
313: {
314: size_t f2, next_field_start, pos_end;
315: bool empty_field, empty_key;
316:
317: empty_field = false;
318: empty_key = false;
319: f2 = ks->f2;
320:
321: if (f2 == 0)
322: return BWSLEN(s) + 1;
323: else {
324: if (ks->c2 == 0) {
325: next_field_start = skip_fields_to_start(s, f2 + 1,
326: &empty_field);
327: if ((next_field_start > 0) && sort_opts_vals.tflag &&
328: ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
329: next_field_start - 1)))
330: --next_field_start;
331: } else
332: next_field_start = skip_fields_to_start(s, f2,
333: &empty_field);
334: }
335:
336: if (empty_field || (next_field_start >= BWSLEN(s)))
337: return BWSLEN(s) + 1;
338:
339: if (ks->c2) {
340: pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
341: ks->pos2b, &empty_key);
342: if (pos_end < BWSLEN(s))
343: ++pos_end;
344: } else
345: pos_end = next_field_start;
346:
347: return pos_end;
348: }
349:
350: /*
351: * Cut a field according to the key specs
352: */
353: static struct bwstring *
354: cut_field(const struct bwstring *s, struct key_specs *ks)
355: {
356: struct bwstring *ret = NULL;
357:
358: if (s && ks) {
359: size_t field_start, key_end, key_start, sz;
360: bool empty_field, empty_key;
361:
362: field_start = 0;
363: key_start = 0;
364: empty_field = false;
365: empty_key = false;
366:
367: find_field_start(s, ks, &field_start, &key_start,
368: &empty_field, &empty_key);
369:
370: if (empty_key)
371: sz = 0;
372: else {
373: key_end = find_field_end(s, ks);
374: sz = (key_end < key_start) ? 0 : (key_end - key_start);
375: }
376:
377: ret = bwsalloc(sz);
378: if (sz)
379: bwsnocpy(ret, s, key_start, sz);
380: } else
381: ret = bwsalloc(0);
382:
383: return ret;
384: }
385:
386: /*
387: * Preprocesses a line applying the necessary transformations
388: * specified by command line options and returns the preprocessed
389: * string, which can be used to compare.
390: */
391: int
392: preproc(struct bwstring *s, struct keys_array *ka)
393: {
394:
395: if (sort_opts_vals.kflag) {
396: size_t i;
397: for (i = 0; i < keys_num; i++) {
398: struct bwstring *key;
399: struct key_specs *kspecs;
400: struct sort_mods *sm;
401:
402: kspecs = &(keys[i]);
403: key = cut_field(s, kspecs);
404:
405: sm = &(kspecs->sm);
406: if (sm->dflag)
407: key = dictionary_order(key);
408: else if (sm->iflag)
409: key = ignore_nonprinting(key);
410: if (sm->fflag || sm->Mflag)
411: key = ignore_case(key);
412:
413: set_key_on_keys_array(ka, key, i);
414: }
415: } else {
416: struct bwstring *ret = NULL;
417: struct sort_mods *sm = default_sort_mods;
418:
419: if (sm->bflag) {
420: if (ret == NULL)
421: ret = bwsdup(s);
422: ret = ignore_leading_blanks(ret);
423: }
424: if (sm->dflag) {
425: if (ret == NULL)
426: ret = bwsdup(s);
427: ret = dictionary_order(ret);
428: } else if (sm->iflag) {
429: if (ret == NULL)
430: ret = bwsdup(s);
431: ret = ignore_nonprinting(ret);
432: }
433: if (sm->fflag || sm->Mflag) {
434: if (ret == NULL)
435: ret = bwsdup(s);
436: ret = ignore_case(ret);
437: }
438: if (ret == NULL)
439: set_key_on_keys_array(ka, s, 0);
440: else
441: set_key_on_keys_array(ka, ret, 0);
442: }
443:
444: return 0;
445: }
446:
447: cmpcoll_t
448: get_sort_func(struct sort_mods *sm)
449: {
450:
451: if (sm->nflag)
452: return numcoll;
453: else if (sm->hflag)
454: return hnumcoll;
455: else if (sm->gflag)
456: return gnumcoll;
457: else if (sm->Mflag)
458: return monthcoll;
459: else if (sm->Rflag)
460: return randomcoll;
461: else if (sm->Vflag)
462: return versioncoll;
463: else
464: return wstrcoll;
465: }
466:
467: /*
468: * Compares the given strings. Returns a positive number if
469: * the first precedes the second, a negative number if the second is
470: * the preceding one, and zero if they are equal. This function calls
471: * the underlying collate functions, which done the actual comparison.
472: */
473: int
474: key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
475: {
476: struct sort_mods *sm;
477: int res = 0;
478: size_t i;
479:
480: for (i = 0; i < keys_num; ++i) {
481: sm = &(keys[i].sm);
482:
483: if (sm->rflag)
484: res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
485: else
486: res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
487:
488: if (res)
489: break;
490:
491: /* offset applies to only the first key */
492: offset = 0;
493: }
494: return res;
495: }
496:
497: /*
498: * Compare two strings.
499: * Plain symbol-by-symbol comparison.
500: */
501: int
502: top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
503: {
504:
505: if (default_sort_mods->rflag) {
506: const struct bwstring *tmp;
507:
508: tmp = s1;
509: s1 = s2;
510: s2 = tmp;
511: }
512:
513: return bwscoll(s1, s2, 0);
514: }
515:
516: /*
517: * Compare a string and a sort list item, according to the sort specs.
518: */
519: int
520: str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
521: {
522: struct keys_array *ka1;
523: int ret = 0;
524:
525: ka1 = keys_array_alloc();
526:
527: preproc(str1, ka1);
528:
529: sort_list_item_make_key(*ss2);
530:
531: if (debug_sort) {
532: bwsprintf(stdout, str1, "; s1=<", ">");
533: bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
534: }
535:
536: ret = key_coll(ka1, &((*ss2)->ka), 0);
537:
538: if (debug_sort)
539: printf("; cmp1=%d", ret);
540:
541: clean_keys_array(str1, ka1);
542: sort_free(ka1);
543:
544: if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
545: ret = top_level_str_coll(str1, ((*ss2)->str));
546: if (debug_sort)
547: printf("; cmp2=%d", ret);
548: }
549:
550: if (debug_sort)
551: putchar('\n');
552:
553: return ret;
554: }
555:
556: /*
557: * Compare two sort list items, according to the sort specs.
558: */
559: int
560: list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
561: size_t offset)
562: {
563: int ret;
564:
565: ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
566:
567: if (debug_sort) {
568: if (offset)
569: printf("; offset=%d", (int) offset);
570: bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
571: bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
572: printf("; cmp1=%d\n", ret);
573: }
574:
575: if (ret)
576: return ret;
577:
578: if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
579: ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
580: if (debug_sort)
581: printf("; cmp2=%d\n", ret);
582: }
583:
584: return ret;
585: }
586:
587: /*
588: * Compare two sort list items, according to the sort specs.
589: */
590: int
591: list_coll(const void *ss1, const void *ss2)
592: {
593:
594: return list_coll_offset((struct sort_list_item **)ss1,
595: (struct sort_list_item **)ss2, 0);
596: }
597:
598: #define LSCDEF(N) \
599: static int \
600: list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
601: { \
602: \
603: return list_coll_offset(ss1, ss2, N); \
604: }
605:
606: LSCDEF(0)
607: LSCDEF(1)
608: LSCDEF(2)
609: LSCDEF(3)
610: LSCDEF(4)
611: LSCDEF(5)
612: LSCDEF(6)
613: LSCDEF(7)
614: LSCDEF(8)
615: LSCDEF(9)
616: LSCDEF(10)
617: LSCDEF(11)
618: LSCDEF(12)
619: LSCDEF(13)
620: LSCDEF(14)
621: LSCDEF(15)
622: LSCDEF(16)
623: LSCDEF(17)
624: LSCDEF(18)
625: LSCDEF(19)
626: LSCDEF(20)
627:
628: listcoll_t
629: get_list_call_func(size_t offset)
630: {
631: static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
632: list_coll_2, list_coll_3, list_coll_4, list_coll_5,
633: list_coll_6, list_coll_7, list_coll_8, list_coll_9,
634: list_coll_10, list_coll_11, list_coll_12, list_coll_13,
635: list_coll_14, list_coll_15, list_coll_16, list_coll_17,
636: list_coll_18, list_coll_19, list_coll_20 };
637:
638: if (offset <= 20)
639: return lsarray[offset];
640:
641: return list_coll_0;
642: }
643:
644: /*
645: * Compare two sort list items, only by their original string.
646: */
647: int
648: list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
649: {
650:
651: return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
652: }
653:
654: /*
655: * Maximum size of a number in the string (before or after decimal point)
656: */
657: #define MAX_NUM_SIZE (128)
658:
659: /*
660: * Set suffix value
661: */
662: static void
663: setsuffix(wchar_t c, unsigned char *si)
664: {
665: switch (c){
666: case L'k':
667: case L'K':
668: *si = 1;
669: break;
670: case L'M':
671: *si = 2;
672: break;
673: case L'G':
674: *si = 3;
675: break;
676: case L'T':
677: *si = 4;
678: break;
679: case L'P':
680: *si = 5;
681: break;
682: case L'E':
683: *si = 6;
684: break;
685: case L'Z':
686: *si = 7;
687: break;
688: case L'Y':
689: *si = 8;
690: break;
691: default:
692: *si = 0;
693: };
694: }
695:
696: /*
697: * Read string s and parse the string into a fixed-decimal-point number.
698: * sign equals -1 if the number is negative (explicit plus is not allowed,
699: * according to GNU sort's "info sort".
700: * The number part before decimal point is in the smain, after the decimal
701: * point is in sfrac, tail is the pointer to the remainder of the string.
702: */
703: static int
704: read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
705: {
706: bwstring_iterator s;
707:
708: s = bws_begin(s0);
709:
710: /* always end the fraction with zero, even if we have no fraction */
711: sfrac[0] = 0;
712:
713: while (iswblank(bws_get_iter_value(s)))
714: s = bws_iterator_inc(s, 1);
715:
716: if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
717: *sign = -1;
718: s = bws_iterator_inc(s, 1);
719: }
720:
721: // This is '0', not '\0', do not change this
722: while (iswdigit(bws_get_iter_value(s)) &&
723: (bws_get_iter_value(s) == L'0'))
724: s = bws_iterator_inc(s, 1);
725:
726: while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
727: if (iswdigit(bws_get_iter_value(s))) {
728: smain[*main_len] = bws_get_iter_value(s);
729: s = bws_iterator_inc(s, 1);
730: *main_len += 1;
731: } else if (symbol_thousands_sep &&
732: (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
733: s = bws_iterator_inc(s, 1);
734: else
735: break;
736: }
737:
738: smain[*main_len] = 0;
739:
740: if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
741: s = bws_iterator_inc(s, 1);
742: while (iswdigit(bws_get_iter_value(s)) &&
743: *frac_len < MAX_NUM_SIZE) {
744: sfrac[*frac_len] = bws_get_iter_value(s);
745: s = bws_iterator_inc(s, 1);
746: *frac_len += 1;
747: }
748: sfrac[*frac_len] = 0;
749:
750: while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
751: --(*frac_len);
752: sfrac[*frac_len] = L'\0';
753: }
754: }
755:
756: setsuffix(bws_get_iter_value(s), si);
757:
758: if ((*main_len + *frac_len) == 0)
759: *sign = 0;
760:
761: return 0;
762: }
763:
764: /*
765: * Implements string sort.
766: */
767: static int
768: wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
769: {
770:
771: if (debug_sort) {
772: if (offset)
773: printf("; offset=%d\n", (int) offset);
774: bwsprintf(stdout, kv1->k, "; k1=<", ">");
775: printf("(%zu)", BWSLEN(kv1->k));
776: bwsprintf(stdout, kv2->k, ", k2=<", ">");
777: printf("(%zu)", BWSLEN(kv2->k));
778: }
779:
780: return bwscoll(kv1->k, kv2->k, offset);
781: }
782:
783: /*
784: * Compare two suffixes
785: */
786: static inline int
787: cmpsuffix(unsigned char si1, unsigned char si2)
788: {
789:
790: return (char)si1 - (char)si2;
791: }
792:
793: /*
794: * Implements numeric sort for -n and -h.
795: */
796: static int
797: numcoll_impl(struct key_value *kv1, struct key_value *kv2,
798: size_t offset __unused, bool use_suffix)
799: {
800: struct bwstring *s1, *s2;
801: wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
802: wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
803: int cmp_res, sign1, sign2;
804: size_t frac1, frac2, main1, main2;
805: unsigned char SI1, SI2;
806: bool e1, e2, key1_read, key2_read;
807:
808: s1 = kv1->k;
809: s2 = kv2->k;
810: sign1 = sign2 = 0;
811: main1 = main2 = 0;
812: frac1 = frac2 = 0;
813:
814: key1_read = key2_read = false;
815:
816: if (debug_sort) {
817: bwsprintf(stdout, s1, "; k1=<", ">");
818: bwsprintf(stdout, s2, ", k2=<", ">");
819: }
820:
821: if (s1 == s2)
822: return 0;
823:
824: if (kv1->hint->status == HS_UNINITIALIZED) {
825: /* read the number from the string */
826: read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
827: key1_read = true;
828: kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
829: if (main1 < 1 && frac1 < 1)
830: kv1->hint->v.nh.empty=true;
831: kv1->hint->v.nh.si = SI1;
832: kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
833: HS_INITIALIZED : HS_ERROR;
834: kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
835: }
836:
837: if (kv2->hint->status == HS_UNINITIALIZED) {
838: /* read the number from the string */
839: read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
840: key2_read = true;
841: kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
842: if (main2 < 1 && frac2 < 1)
843: kv2->hint->v.nh.empty=true;
844: kv2->hint->v.nh.si = SI2;
845: kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
846: HS_INITIALIZED : HS_ERROR;
847: kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
848: }
849:
850: if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
851: HS_INITIALIZED) {
852: unsigned long long n1, n2;
853: bool neg1, neg2;
854:
855: e1 = kv1->hint->v.nh.empty;
856: e2 = kv2->hint->v.nh.empty;
857:
858: if (e1 && e2)
859: return 0;
860:
861: neg1 = kv1->hint->v.nh.neg;
862: neg2 = kv2->hint->v.nh.neg;
863:
864: if (neg1 && !neg2)
865: return -1;
866: if (neg2 && !neg1)
867: return 1;
868:
869: if (e1)
870: return neg2 ? 1 : -1;
871: else if (e2)
872: return neg1 ? -1 : 1;
873:
874:
875: if (use_suffix) {
876: cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
877: if (cmp_res)
878: return neg1 ? -cmp_res : cmp_res;
879: }
880:
881: n1 = kv1->hint->v.nh.n1;
882: n2 = kv2->hint->v.nh.n1;
883: if (n1 < n2)
884: return neg1 ? 1 : -1;
885: else if (n1 > n2)
886: return neg1 ? -1 : 1;
887: }
888:
889: /* read the numbers from the strings */
890: if (!key1_read)
891: read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
892: if (!key2_read)
893: read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
894:
895: e1 = ((main1 + frac1) == 0);
896: e2 = ((main2 + frac2) == 0);
897:
898: if (e1 && e2)
899: return 0;
900:
901: /* we know the result if the signs are different */
902: if (sign1 < 0 && sign2 >= 0)
903: return -1;
904: if (sign1 >= 0 && sign2 < 0)
905: return 1;
906:
907: if (e1)
908: return (sign2 < 0) ? +1 : -1;
909: else if (e2)
910: return (sign1 < 0) ? -1 : 1;
911:
912: if (use_suffix) {
913: cmp_res = cmpsuffix(SI1, SI2);
914: if (cmp_res)
915: return (sign1 < 0) ? -cmp_res : cmp_res;
916: }
917:
918: /* if both numbers are empty assume that the strings are equal */
919: if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
920: return 0;
921:
922: /*
923: * if the main part is of different size, we know the result
924: * (because the leading zeros are removed)
925: */
926: if (main1 < main2)
927: cmp_res = -1;
928: else if (main1 > main2)
929: cmp_res = +1;
930: /* if the sizes are equal then simple non-collate string compare gives the correct result */
931: else
932: cmp_res = wcscmp(smain1, smain2);
933:
934: /* check fraction */
935: if (!cmp_res)
936: cmp_res = wcscmp(sfrac1, sfrac2);
937:
938: if (!cmp_res)
939: return 0;
940:
941: /* reverse result if the signs are negative */
942: if (sign1 < 0 && sign2 < 0)
943: cmp_res = -cmp_res;
944:
945: return cmp_res;
946: }
947:
948: /*
949: * Implements numeric sort (-n).
950: */
951: static int
952: numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
953: {
954:
955: return numcoll_impl(kv1, kv2, offset, false);
956: }
957:
958: /*
959: * Implements 'human' numeric sort (-h).
960: */
961: static int
962: hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
963: {
964:
965: return numcoll_impl(kv1, kv2, offset, true);
966: }
967:
968: /*
969: * Implements random sort (-R).
970: */
971: static int
972: randomcoll(struct key_value *kv1, struct key_value *kv2,
973: size_t offset __unused)
974: {
975: struct bwstring *s1, *s2;
976: MD5_CTX ctx1, ctx2;
977: char *b1, *b2;
978:
979: s1 = kv1->k;
980: s2 = kv2->k;
981:
982: if (debug_sort) {
983: bwsprintf(stdout, s1, "; k1=<", ">");
984: bwsprintf(stdout, s2, ", k2=<", ">");
985: }
986:
987: if (s1 == s2)
988: return 0;
989:
990: memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
991: memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
992:
993: MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
994: MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
995: b1 = MD5End(&ctx1, NULL);
996: b2 = MD5End(&ctx2, NULL);
997: if (b1 == NULL) {
998: if (b2 == NULL)
999: return 0;
1000: else {
1001: sort_free(b2);
1002: return -1;
1003: }
1004: } else if (b2 == NULL) {
1005: sort_free(b1);
1006: return 1;
1007: } else {
1008: int cmp_res;
1009:
1010: cmp_res = strcmp(b1, b2);
1011: sort_free(b1);
1012: sort_free(b2);
1013:
1014: if (!cmp_res)
1015: cmp_res = bwscoll(s1, s2, 0);
1016:
1017: return cmp_res;
1018: }
1019: }
1020:
1021: /*
1022: * Implements version sort (-V).
1023: */
1024: static int
1025: versioncoll(struct key_value *kv1, struct key_value *kv2,
1026: size_t offset __unused)
1027: {
1028: struct bwstring *s1, *s2;
1029:
1030: s1 = kv1->k;
1031: s2 = kv2->k;
1032:
1033: if (debug_sort) {
1034: bwsprintf(stdout, s1, "; k1=<", ">");
1035: bwsprintf(stdout, s2, ", k2=<", ">");
1036: }
1037:
1038: if (s1 == s2)
1039: return 0;
1040:
1041: return vcmp(s1, s2);
1042: }
1043:
1044: /*
1045: * Check for minus infinity
1046: */
1047: static inline bool
1048: huge_minus(double d, int err1)
1049: {
1050:
1051: if (err1 == ERANGE)
1052: if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1053: return 1;
1054:
1055: return 0;
1056: }
1057:
1058: /*
1059: * Check for plus infinity
1060: */
1061: static inline bool
1062: huge_plus(double d, int err1)
1063: {
1064:
1065: if (err1 == ERANGE)
1066: if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1067: return 1;
1068:
1069: return 0;
1070: }
1071:
1072: /*
1073: * Check whether a function is a NAN
1074: */
1075: static bool
1076: is_nan(double d)
1077: {
1.3 ! miod 1078: #if defined(NAN)
1.1 millert 1079: return (d == NAN || isnan(d));
1.3 ! miod 1080: #else
! 1081: return (isnan(d));
! 1082: #endif
1.1 millert 1083: }
1084:
1085: /*
1086: * Compare two NANs
1087: */
1088: static int
1089: cmp_nans(double d1, double d2)
1090: {
1091:
1092: if (d1 == d2)
1093: return 0;
1094: return d1 < d2 ? -1 : 1;
1095: }
1096:
1097: /*
1098: * Implements general numeric sort (-g).
1099: */
1100: static int
1101: gnumcoll(struct key_value *kv1, struct key_value *kv2,
1102: size_t offset __unused)
1103: {
1104: double d1, d2;
1105: int err1, err2;
1106: bool empty1, empty2, key1_read, key2_read;
1107:
1108: d1 = d2 = 0;
1109: err1 = err2 = 0;
1110: key1_read = key2_read = false;
1111:
1112: if (debug_sort) {
1113: bwsprintf(stdout, kv1->k, "; k1=<", ">");
1114: bwsprintf(stdout, kv2->k, "; k2=<", ">");
1115: }
1116:
1117: if (kv1->hint->status == HS_UNINITIALIZED) {
1118: errno = 0;
1119: d1 = bwstod(kv1->k, &empty1);
1120: err1 = errno;
1121:
1122: if (empty1)
1123: kv1->hint->v.gh.notnum = true;
1124: else if (err1 == 0) {
1125: kv1->hint->v.gh.d = d1;
1126: kv1->hint->v.gh.nan = is_nan(d1);
1127: kv1->hint->status = HS_INITIALIZED;
1128: } else
1129: kv1->hint->status = HS_ERROR;
1130:
1131: key1_read = true;
1132: }
1133:
1134: if (kv2->hint->status == HS_UNINITIALIZED) {
1135: errno = 0;
1136: d2 = bwstod(kv2->k, &empty2);
1137: err2 = errno;
1138:
1139: if (empty2)
1140: kv2->hint->v.gh.notnum = true;
1141: else if (err2 == 0) {
1142: kv2->hint->v.gh.d = d2;
1143: kv2->hint->v.gh.nan = is_nan(d2);
1144: kv2->hint->status = HS_INITIALIZED;
1145: } else
1146: kv2->hint->status = HS_ERROR;
1147:
1148: key2_read = true;
1149: }
1150:
1151: if (kv1->hint->status == HS_INITIALIZED &&
1152: kv2->hint->status == HS_INITIALIZED) {
1153: if (kv1->hint->v.gh.notnum)
1154: return kv2->hint->v.gh.notnum ? 0 : -1;
1155: else if (kv2->hint->v.gh.notnum)
1156: return 1;
1157:
1158: if (kv1->hint->v.gh.nan)
1159: return kv2->hint->v.gh.nan ?
1160: cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
1161: else if (kv2->hint->v.gh.nan)
1162: return 1;
1163:
1164: d1 = kv1->hint->v.gh.d;
1165: d2 = kv2->hint->v.gh.d;
1166:
1167: if (d1 < d2)
1168: return -1;
1169: else if (d1 > d2)
1170: return 1;
1171: else
1172: return 0;
1173: }
1174:
1175: if (!key1_read) {
1176: errno = 0;
1177: d1 = bwstod(kv1->k, &empty1);
1178: err1 = errno;
1179: }
1180:
1181: if (!key2_read) {
1182: errno = 0;
1183: d2 = bwstod(kv2->k, &empty2);
1184: err2 = errno;
1185: }
1186:
1187: /* Non-value case: */
1188: if (empty1)
1189: return empty2 ? 0 : -1;
1190: else if (empty2)
1191: return 1;
1192:
1193: /* NAN case */
1194: if (is_nan(d1))
1195: return is_nan(d2) ? cmp_nans(d1, d2) : -1;
1196: else if (is_nan(d2))
1197: return 1;
1198:
1199: /* Infinities */
1200: if (err1 == ERANGE || err2 == ERANGE) {
1201: /* Minus infinity case */
1202: if (huge_minus(d1, err1)) {
1203: if (huge_minus(d2, err2)) {
1204: if (d1 == d2)
1205: return 0;
1206: return d1 < d2 ? -1 : 1;
1207: } else
1208: return -1;
1209:
1210: } else if (huge_minus(d2, err2)) {
1211: if (huge_minus(d1, err1)) {
1212: if (d1 == d2)
1213: return 0;
1214: return d1 < d2 ? -1 : 1;
1215: } else
1216: return 1;
1217: }
1218:
1219: /* Plus infinity case */
1220: if (huge_plus(d1, err1)) {
1221: if (huge_plus(d2, err2)) {
1222: if (d1 == d2)
1223: return 0;
1224: return d1 < d2 ? -1 : 1;
1225: } else
1226: return 1;
1227: } else if (huge_plus(d2, err2)) {
1228: if (huge_plus(d1, err1)) {
1229: if (d1 == d2)
1230: return 0;
1231: return d1 < d2 ? -1 : 1;
1232: } else
1233: return -1;
1234: }
1235: }
1236:
1237: if (d1 == d2)
1238: return 0;
1239: return d1 < d2 ? -1 : 1;
1240: }
1241:
1242: /*
1243: * Implements month sort (-M).
1244: */
1245: static int
1246: monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1247: {
1248: int val1, val2;
1249: bool key1_read, key2_read;
1250:
1251: val1 = val2 = 0;
1252: key1_read = key2_read = false;
1253:
1254: if (debug_sort) {
1255: bwsprintf(stdout, kv1->k, "; k1=<", ">");
1256: bwsprintf(stdout, kv2->k, "; k2=<", ">");
1257: }
1258:
1259: if (kv1->hint->status == HS_UNINITIALIZED) {
1260: kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1261: key1_read = true;
1262: kv1->hint->status = HS_INITIALIZED;
1263: }
1264:
1265: if (kv2->hint->status == HS_UNINITIALIZED) {
1266: kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1267: key2_read = true;
1268: kv2->hint->status = HS_INITIALIZED;
1269: }
1270:
1271: if (kv1->hint->status == HS_INITIALIZED) {
1272: val1 = kv1->hint->v.Mh.m;
1273: key1_read = true;
1274: }
1275:
1276: if (kv2->hint->status == HS_INITIALIZED) {
1277: val2 = kv2->hint->v.Mh.m;
1278: key2_read = true;
1279: }
1280:
1281: if (!key1_read)
1282: val1 = bws_month_score(kv1->k);
1283: if (!key2_read)
1284: val2 = bws_month_score(kv2->k);
1285:
1286: if (val1 == val2)
1287: return 0;
1288: return val1 < val2 ? -1 : 1;
1289: }