Annotation of src/usr.bin/sort/coll.c, Revision 1.12
1.12 ! schwarze 1: /* $OpenBSD: coll.c,v 1.11 2015/12/11 21:41:51 mmcc Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6: * All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
30: #include <sys/types.h>
31:
32: #include <errno.h>
33: #include <err.h>
34: #include <langinfo.h>
35: #include <limits.h>
36: #include <math.h>
37: #include <md5.h>
38: #include <stdlib.h>
39: #include <string.h>
40: #include <wchar.h>
41: #include <wctype.h>
42:
43: #include "coll.h"
44: #include "vsort.h"
45:
46: struct key_specs *keys;
47: size_t keys_num = 0;
48:
49: static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
50: static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
51: static int monthcoll(struct key_value*, struct key_value *, size_t offset);
52: static int numcoll(struct key_value*, struct key_value *, size_t offset);
53: static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
54: static int randomcoll(struct key_value*, struct key_value *, size_t offset);
55: static int versioncoll(struct key_value*, struct key_value *, size_t offset);
56:
57: /*
58: * Allocate keys array
59: */
60: struct keys_array *
61: keys_array_alloc(void)
62: {
63: struct keys_array *ka;
64: size_t sz;
65:
66: sz = keys_array_size();
1.2 millert 67: ka = sort_calloc(1, sz);
1.1 millert 68:
69: return ka;
70: }
71:
72: /*
1.8 deraadt 73: * Calculate whether we need key hint space
1.1 millert 74: */
75: static size_t
76: key_hint_size(void)
77: {
78: return need_hint ? sizeof(struct key_hint) : 0;
79: }
80:
81: /*
82: * Calculate keys array size
83: */
84: size_t
85: keys_array_size(void)
86: {
87: return keys_num * (sizeof(struct key_value) + key_hint_size());
88: }
89:
90: /*
91: * Clean data of keys array
92: */
93: void
94: clean_keys_array(const struct bwstring *s, struct keys_array *ka)
95: {
96: if (ka) {
97: size_t i;
98:
99: for (i = 0; i < keys_num; ++i)
100: if (ka->key[i].k && ka->key[i].k != s)
101: bwsfree(ka->key[i].k);
102: memset(ka, 0, keys_array_size());
103: }
104: }
105:
106: /*
107: * Set value of a key in the keys set
108: */
109: void
110: set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
111: {
112: if (ka && keys_num > ind) {
113: struct key_value *kv;
114:
115: kv = &(ka->key[ind]);
116:
1.7 tobias 117: if (kv->k != s)
1.1 millert 118: bwsfree(kv->k);
119: kv->k = s;
120: }
121: }
122:
123: /*
124: * Initialize a sort list item
125: */
126: struct sort_list_item *
127: sort_list_item_alloc(void)
128: {
129: struct sort_list_item *si;
130: size_t sz;
131:
132: sz = sizeof(struct sort_list_item) + keys_array_size();
1.2 millert 133: si = sort_calloc(1, sz);
1.1 millert 134:
135: return si;
136: }
137:
138: size_t
139: sort_list_item_size(struct sort_list_item *si)
140: {
141: size_t i, ret = 0;
142:
143: if (si) {
144: ret = sizeof(struct sort_list_item) + keys_array_size();
145: if (si->str)
146: ret += bws_memsize(si->str);
147: for (i = 0; i < keys_num; ++i) {
148: struct key_value *kv;
149:
150: kv = &(si->ka.key[i]);
151:
152: if (kv->k != si->str)
153: ret += bws_memsize(kv->k);
154: }
155: }
156: return ret;
157: }
158:
159: /*
160: * Calculate key for a sort list item
161: */
162: static void
163: sort_list_item_make_key(struct sort_list_item *si)
164: {
165: preproc(si->str, &(si->ka));
166: }
167:
168: /*
169: * Set value of a sort list item.
170: * Return combined string and keys memory size.
171: */
172: void
173: sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
174: {
175: if (si) {
176: clean_keys_array(si->str, &(si->ka));
177: if (si->str) {
178: if (si->str == str) {
179: /* we are trying to reset the same string */
180: return;
181: } else {
182: bwsfree(si->str);
183: si->str = NULL;
184: }
185: }
186: si->str = str;
187: sort_list_item_make_key(si);
188: }
189: }
190:
191: /*
192: * De-allocate a sort list item object memory
193: */
194: void
195: sort_list_item_clean(struct sort_list_item *si)
196: {
197: if (si) {
198: clean_keys_array(si->str, &(si->ka));
199: if (si->str) {
200: bwsfree(si->str);
201: si->str = NULL;
202: }
203: }
204: }
205:
206: /*
207: * Skip columns according to specs
208: */
209: static size_t
210: skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
211: bool skip_blanks, bool *empty_key)
212: {
213: if (cols < 1)
214: return BWSLEN(s) + 1;
215:
216: if (skip_blanks)
217: while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
218: ++start;
219:
220: while (start < BWSLEN(s) && cols > 1) {
221: --cols;
222: ++start;
223: }
224:
225: if (start >= BWSLEN(s))
226: *empty_key = true;
227:
228: return start;
229: }
230:
231: /*
232: * Skip fields according to specs
233: */
234: static size_t
235: skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
236: {
237: if (fields < 2) {
238: if (BWSLEN(s) == 0)
239: *empty_field = true;
240: return 0;
241: } else if (!(sort_opts_vals.tflag)) {
242: size_t cpos = 0;
243: bool pb = true;
244:
245: while (cpos < BWSLEN(s)) {
246: bool isblank;
247:
248: isblank = iswblank(BWS_GET(s, cpos));
249:
250: if (isblank && !pb) {
251: --fields;
252: if (fields <= 1)
253: return cpos;
254: }
255: pb = isblank;
256: ++cpos;
257: }
258: if (fields > 1)
259: *empty_field = true;
260: return cpos;
261: } else {
262: size_t cpos = 0;
263:
264: while (cpos < BWSLEN(s)) {
265: if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
266: --fields;
267: if (fields <= 1)
268: return cpos + 1;
269: }
270: ++cpos;
271: }
272: if (fields > 1)
273: *empty_field = true;
274: return cpos;
275: }
276: }
277:
278: /*
279: * Find fields start
280: */
281: static void
282: find_field_start(const struct bwstring *s, struct key_specs *ks,
283: size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
284: {
285: *field_start = skip_fields_to_start(s, ks->f1, empty_field);
286: if (!*empty_field)
287: *key_start = skip_cols_to_start(s, ks->c1, *field_start,
288: ks->pos1b, empty_key);
289: else
290: *empty_key = true;
291: }
292:
293: /*
294: * Find end key position
295: */
296: static size_t
297: find_field_end(const struct bwstring *s, struct key_specs *ks)
298: {
299: size_t f2, next_field_start, pos_end;
300: bool empty_field, empty_key;
301:
302: empty_field = false;
303: empty_key = false;
304: f2 = ks->f2;
305:
306: if (f2 == 0)
307: return BWSLEN(s) + 1;
308: else {
309: if (ks->c2 == 0) {
310: next_field_start = skip_fields_to_start(s, f2 + 1,
311: &empty_field);
312: if ((next_field_start > 0) && sort_opts_vals.tflag &&
313: ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
314: next_field_start - 1)))
315: --next_field_start;
316: } else
317: next_field_start = skip_fields_to_start(s, f2,
318: &empty_field);
319: }
320:
321: if (empty_field || (next_field_start >= BWSLEN(s)))
322: return BWSLEN(s) + 1;
323:
324: if (ks->c2) {
325: pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
326: ks->pos2b, &empty_key);
327: if (pos_end < BWSLEN(s))
328: ++pos_end;
329: } else
330: pos_end = next_field_start;
331:
332: return pos_end;
333: }
334:
335: /*
336: * Cut a field according to the key specs
337: */
338: static struct bwstring *
339: cut_field(const struct bwstring *s, struct key_specs *ks)
340: {
341: struct bwstring *ret = NULL;
342:
343: if (s && ks) {
344: size_t field_start, key_end, key_start, sz;
345: bool empty_field, empty_key;
346:
347: field_start = 0;
348: key_start = 0;
349: empty_field = false;
350: empty_key = false;
351:
352: find_field_start(s, ks, &field_start, &key_start,
353: &empty_field, &empty_key);
354:
355: if (empty_key)
356: sz = 0;
357: else {
358: key_end = find_field_end(s, ks);
359: sz = (key_end < key_start) ? 0 : (key_end - key_start);
360: }
361:
362: ret = bwsalloc(sz);
363: if (sz)
364: bwsnocpy(ret, s, key_start, sz);
365: } else
366: ret = bwsalloc(0);
367:
368: return ret;
369: }
370:
371: /*
372: * Preprocesses a line applying the necessary transformations
373: * specified by command line options and returns the preprocessed
374: * string, which can be used to compare.
375: */
376: int
377: preproc(struct bwstring *s, struct keys_array *ka)
378: {
379: if (sort_opts_vals.kflag) {
380: size_t i;
381: for (i = 0; i < keys_num; i++) {
382: struct bwstring *key;
383: struct key_specs *kspecs;
384: struct sort_mods *sm;
385:
386: kspecs = &(keys[i]);
387: key = cut_field(s, kspecs);
388:
389: sm = &(kspecs->sm);
390: if (sm->dflag)
391: key = dictionary_order(key);
392: else if (sm->iflag)
393: key = ignore_nonprinting(key);
394: if (sm->fflag || sm->Mflag)
395: key = ignore_case(key);
396:
397: set_key_on_keys_array(ka, key, i);
398: }
399: } else {
400: struct bwstring *ret = NULL;
401: struct sort_mods *sm = default_sort_mods;
402:
1.9 millert 403: #ifdef GNUSORT_COMPATIBILITY
1.1 millert 404: if (sm->bflag) {
405: if (ret == NULL)
406: ret = bwsdup(s);
407: ret = ignore_leading_blanks(ret);
408: }
1.9 millert 409: #endif
1.1 millert 410: if (sm->dflag) {
411: if (ret == NULL)
412: ret = bwsdup(s);
413: ret = dictionary_order(ret);
414: } else if (sm->iflag) {
415: if (ret == NULL)
416: ret = bwsdup(s);
417: ret = ignore_nonprinting(ret);
418: }
419: if (sm->fflag || sm->Mflag) {
420: if (ret == NULL)
421: ret = bwsdup(s);
422: ret = ignore_case(ret);
423: }
424: if (ret == NULL)
425: set_key_on_keys_array(ka, s, 0);
426: else
427: set_key_on_keys_array(ka, ret, 0);
428: }
429:
430: return 0;
431: }
432:
433: cmpcoll_t
434: get_sort_func(struct sort_mods *sm)
435: {
436: if (sm->nflag)
437: return numcoll;
438: else if (sm->hflag)
439: return hnumcoll;
440: else if (sm->gflag)
441: return gnumcoll;
442: else if (sm->Mflag)
443: return monthcoll;
444: else if (sm->Rflag)
445: return randomcoll;
446: else if (sm->Vflag)
447: return versioncoll;
448: else
449: return wstrcoll;
450: }
451:
452: /*
453: * Compares the given strings. Returns a positive number if
454: * the first precedes the second, a negative number if the second is
455: * the preceding one, and zero if they are equal. This function calls
456: * the underlying collate functions, which done the actual comparison.
457: */
458: int
459: key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
460: {
461: struct sort_mods *sm;
462: int res = 0;
463: size_t i;
464:
465: for (i = 0; i < keys_num; ++i) {
466: sm = &(keys[i].sm);
467:
468: if (sm->rflag)
469: res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
470: else
471: res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
472:
473: if (res)
474: break;
475:
476: /* offset applies to only the first key */
477: offset = 0;
478: }
479: return res;
480: }
481:
482: /*
483: * Compare two strings.
484: * Plain symbol-by-symbol comparison.
485: */
486: int
487: top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
488: {
489: if (default_sort_mods->rflag) {
490: const struct bwstring *tmp;
491:
492: tmp = s1;
493: s1 = s2;
494: s2 = tmp;
495: }
496:
497: return bwscoll(s1, s2, 0);
498: }
499:
500: /*
501: * Compare a string and a sort list item, according to the sort specs.
502: */
503: int
504: str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
505: {
506: struct keys_array *ka1;
507: int ret = 0;
508:
509: ka1 = keys_array_alloc();
510:
511: preproc(str1, ka1);
512:
513: sort_list_item_make_key(*ss2);
514:
515: if (debug_sort) {
516: bwsprintf(stdout, str1, "; s1=<", ">");
517: bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
518: }
519:
520: ret = key_coll(ka1, &((*ss2)->ka), 0);
521:
522: if (debug_sort)
523: printf("; cmp1=%d", ret);
524:
525: clean_keys_array(str1, ka1);
526: sort_free(ka1);
527:
528: if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
529: ret = top_level_str_coll(str1, ((*ss2)->str));
530: if (debug_sort)
531: printf("; cmp2=%d", ret);
532: }
533:
534: if (debug_sort)
535: putchar('\n');
536:
537: return ret;
538: }
539:
540: /*
541: * Compare two sort list items, according to the sort specs.
542: */
543: int
544: list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
545: size_t offset)
546: {
547: int ret;
548:
549: ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
550:
551: if (debug_sort) {
552: if (offset)
1.11 mmcc 553: printf("; offset=%zu", offset);
1.1 millert 554: bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
555: bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
556: printf("; cmp1=%d\n", ret);
557: }
558:
559: if (ret)
560: return ret;
561:
562: if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
563: ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
564: if (debug_sort)
565: printf("; cmp2=%d\n", ret);
566: }
567:
568: return ret;
569: }
570:
571: /*
572: * Compare two sort list items, according to the sort specs.
573: */
574: int
575: list_coll(const void *ss1, const void *ss2)
576: {
577: return list_coll_offset((struct sort_list_item **)ss1,
578: (struct sort_list_item **)ss2, 0);
579: }
580:
1.8 deraadt 581: #define LSCDEF(N) \
582: static int \
1.1 millert 583: list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
584: { \
585: \
586: return list_coll_offset(ss1, ss2, N); \
587: }
588:
589: LSCDEF(0)
590: LSCDEF(1)
591: LSCDEF(2)
592: LSCDEF(3)
593: LSCDEF(4)
594: LSCDEF(5)
595: LSCDEF(6)
596: LSCDEF(7)
597: LSCDEF(8)
598: LSCDEF(9)
599: LSCDEF(10)
600: LSCDEF(11)
601: LSCDEF(12)
602: LSCDEF(13)
603: LSCDEF(14)
604: LSCDEF(15)
605: LSCDEF(16)
606: LSCDEF(17)
607: LSCDEF(18)
608: LSCDEF(19)
609: LSCDEF(20)
610:
611: listcoll_t
612: get_list_call_func(size_t offset)
613: {
614: static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
615: list_coll_2, list_coll_3, list_coll_4, list_coll_5,
616: list_coll_6, list_coll_7, list_coll_8, list_coll_9,
617: list_coll_10, list_coll_11, list_coll_12, list_coll_13,
618: list_coll_14, list_coll_15, list_coll_16, list_coll_17,
619: list_coll_18, list_coll_19, list_coll_20 };
620:
621: if (offset <= 20)
622: return lsarray[offset];
623:
624: return list_coll_0;
625: }
626:
627: /*
628: * Compare two sort list items, only by their original string.
629: */
630: int
631: list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
632: {
633: return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
634: }
635:
636: /*
637: * Maximum size of a number in the string (before or after decimal point)
638: */
639: #define MAX_NUM_SIZE (128)
640:
641: /*
642: * Set suffix value
643: */
644: static void
645: setsuffix(wchar_t c, unsigned char *si)
646: {
647: switch (c){
648: case L'k':
649: case L'K':
650: *si = 1;
651: break;
652: case L'M':
653: *si = 2;
654: break;
655: case L'G':
656: *si = 3;
657: break;
658: case L'T':
659: *si = 4;
660: break;
661: case L'P':
662: *si = 5;
663: break;
664: case L'E':
665: *si = 6;
666: break;
667: case L'Z':
668: *si = 7;
669: break;
670: case L'Y':
671: *si = 8;
672: break;
673: default:
674: *si = 0;
675: };
676: }
677:
678: /*
679: * Read string s and parse the string into a fixed-decimal-point number.
680: * sign equals -1 if the number is negative (explicit plus is not allowed,
681: * according to GNU sort's "info sort".
682: * The number part before decimal point is in the smain, after the decimal
683: * point is in sfrac, tail is the pointer to the remainder of the string.
684: */
685: static int
686: read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
687: {
688: bwstring_iterator s;
689:
690: s = bws_begin(s0);
691:
692: /* always end the fraction with zero, even if we have no fraction */
693: sfrac[0] = 0;
694:
695: while (iswblank(bws_get_iter_value(s)))
696: s = bws_iterator_inc(s, 1);
697:
1.12 ! schwarze 698: if (bws_get_iter_value(s) == L'-') {
1.1 millert 699: *sign = -1;
700: s = bws_iterator_inc(s, 1);
701: }
702:
703: // This is '0', not '\0', do not change this
704: while (iswdigit(bws_get_iter_value(s)) &&
705: (bws_get_iter_value(s) == L'0'))
706: s = bws_iterator_inc(s, 1);
707:
708: while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
709: if (iswdigit(bws_get_iter_value(s))) {
710: smain[*main_len] = bws_get_iter_value(s);
711: s = bws_iterator_inc(s, 1);
712: *main_len += 1;
1.12 ! schwarze 713: } else
1.1 millert 714: break;
715: }
716:
717: smain[*main_len] = 0;
718:
1.12 ! schwarze 719: if (bws_get_iter_value(s) == L'.') {
1.1 millert 720: s = bws_iterator_inc(s, 1);
721: while (iswdigit(bws_get_iter_value(s)) &&
722: *frac_len < MAX_NUM_SIZE) {
723: sfrac[*frac_len] = bws_get_iter_value(s);
724: s = bws_iterator_inc(s, 1);
725: *frac_len += 1;
726: }
727: sfrac[*frac_len] = 0;
728:
729: while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
730: --(*frac_len);
731: sfrac[*frac_len] = L'\0';
732: }
733: }
734:
735: setsuffix(bws_get_iter_value(s), si);
736:
737: if ((*main_len + *frac_len) == 0)
738: *sign = 0;
739:
740: return 0;
741: }
742:
743: /*
744: * Implements string sort.
745: */
746: static int
747: wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
748: {
749:
750: if (debug_sort) {
751: if (offset)
1.11 mmcc 752: printf("; offset=%zu\n", offset);
1.1 millert 753: bwsprintf(stdout, kv1->k, "; k1=<", ">");
754: printf("(%zu)", BWSLEN(kv1->k));
755: bwsprintf(stdout, kv2->k, ", k2=<", ">");
756: printf("(%zu)", BWSLEN(kv2->k));
757: }
758:
759: return bwscoll(kv1->k, kv2->k, offset);
760: }
761:
762: /*
763: * Compare two suffixes
764: */
765: static inline int
766: cmpsuffix(unsigned char si1, unsigned char si2)
767: {
768: return (char)si1 - (char)si2;
769: }
770:
771: /*
772: * Implements numeric sort for -n and -h.
773: */
774: static int
775: numcoll_impl(struct key_value *kv1, struct key_value *kv2,
776: size_t offset __unused, bool use_suffix)
777: {
778: struct bwstring *s1, *s2;
779: wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
780: wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
781: int cmp_res, sign1, sign2;
782: size_t frac1, frac2, main1, main2;
783: unsigned char SI1, SI2;
784: bool e1, e2, key1_read, key2_read;
785:
786: s1 = kv1->k;
787: s2 = kv2->k;
788: sign1 = sign2 = 0;
789: main1 = main2 = 0;
790: frac1 = frac2 = 0;
791:
792: key1_read = key2_read = false;
793:
794: if (debug_sort) {
795: bwsprintf(stdout, s1, "; k1=<", ">");
796: bwsprintf(stdout, s2, ", k2=<", ">");
797: }
798:
799: if (s1 == s2)
800: return 0;
801:
802: if (kv1->hint->status == HS_UNINITIALIZED) {
803: /* read the number from the string */
804: read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
805: key1_read = true;
806: kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
807: if (main1 < 1 && frac1 < 1)
808: kv1->hint->v.nh.empty=true;
809: kv1->hint->v.nh.si = SI1;
810: kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
811: HS_INITIALIZED : HS_ERROR;
812: kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
813: }
814:
815: if (kv2->hint->status == HS_UNINITIALIZED) {
816: /* read the number from the string */
817: read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
818: key2_read = true;
819: kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
820: if (main2 < 1 && frac2 < 1)
821: kv2->hint->v.nh.empty=true;
822: kv2->hint->v.nh.si = SI2;
823: kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
824: HS_INITIALIZED : HS_ERROR;
825: kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
826: }
827:
828: if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
829: HS_INITIALIZED) {
830: unsigned long long n1, n2;
831: bool neg1, neg2;
832:
833: e1 = kv1->hint->v.nh.empty;
834: e2 = kv2->hint->v.nh.empty;
835:
836: if (e1 && e2)
837: return 0;
838:
839: neg1 = kv1->hint->v.nh.neg;
840: neg2 = kv2->hint->v.nh.neg;
841:
842: if (neg1 && !neg2)
843: return -1;
844: if (neg2 && !neg1)
845: return 1;
846:
847: if (e1)
848: return neg2 ? 1 : -1;
849: else if (e2)
850: return neg1 ? -1 : 1;
851:
852:
853: if (use_suffix) {
854: cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
855: if (cmp_res)
856: return neg1 ? -cmp_res : cmp_res;
857: }
858:
859: n1 = kv1->hint->v.nh.n1;
860: n2 = kv2->hint->v.nh.n1;
861: if (n1 < n2)
862: return neg1 ? 1 : -1;
863: else if (n1 > n2)
864: return neg1 ? -1 : 1;
865: }
866:
867: /* read the numbers from the strings */
868: if (!key1_read)
869: read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
870: if (!key2_read)
871: read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
872:
873: e1 = ((main1 + frac1) == 0);
874: e2 = ((main2 + frac2) == 0);
875:
876: if (e1 && e2)
877: return 0;
878:
879: /* we know the result if the signs are different */
880: if (sign1 < 0 && sign2 >= 0)
881: return -1;
882: if (sign1 >= 0 && sign2 < 0)
883: return 1;
884:
885: if (e1)
886: return (sign2 < 0) ? +1 : -1;
887: else if (e2)
888: return (sign1 < 0) ? -1 : 1;
889:
890: if (use_suffix) {
891: cmp_res = cmpsuffix(SI1, SI2);
892: if (cmp_res)
893: return (sign1 < 0) ? -cmp_res : cmp_res;
894: }
895:
896: /* if both numbers are empty assume that the strings are equal */
897: if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
898: return 0;
899:
900: /*
901: * if the main part is of different size, we know the result
902: * (because the leading zeros are removed)
903: */
904: if (main1 < main2)
905: cmp_res = -1;
906: else if (main1 > main2)
907: cmp_res = +1;
908: /* if the sizes are equal then simple non-collate string compare gives the correct result */
909: else
910: cmp_res = wcscmp(smain1, smain2);
911:
912: /* check fraction */
913: if (!cmp_res)
914: cmp_res = wcscmp(sfrac1, sfrac2);
915:
916: if (!cmp_res)
917: return 0;
918:
919: /* reverse result if the signs are negative */
920: if (sign1 < 0 && sign2 < 0)
921: cmp_res = -cmp_res;
922:
923: return cmp_res;
924: }
925:
926: /*
927: * Implements numeric sort (-n).
928: */
929: static int
930: numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
931: {
932: return numcoll_impl(kv1, kv2, offset, false);
933: }
934:
935: /*
936: * Implements 'human' numeric sort (-h).
937: */
938: static int
939: hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
940: {
941: return numcoll_impl(kv1, kv2, offset, true);
942: }
943:
944: /*
945: * Implements random sort (-R).
946: */
947: static int
948: randomcoll(struct key_value *kv1, struct key_value *kv2,
949: size_t offset __unused)
950: {
951: struct bwstring *s1, *s2;
952: MD5_CTX ctx1, ctx2;
953: char *b1, *b2;
954:
955: s1 = kv1->k;
956: s2 = kv2->k;
957:
958: if (debug_sort) {
959: bwsprintf(stdout, s1, "; k1=<", ">");
960: bwsprintf(stdout, s2, ", k2=<", ">");
961: }
962:
963: if (s1 == s2)
964: return 0;
965:
966: memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
967: memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
968:
969: MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
970: MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
971: b1 = MD5End(&ctx1, NULL);
972: b2 = MD5End(&ctx2, NULL);
973: if (b1 == NULL) {
974: if (b2 == NULL)
975: return 0;
976: else {
977: sort_free(b2);
978: return -1;
979: }
980: } else if (b2 == NULL) {
981: sort_free(b1);
982: return 1;
983: } else {
984: int cmp_res;
985:
986: cmp_res = strcmp(b1, b2);
987: sort_free(b1);
988: sort_free(b2);
989:
990: if (!cmp_res)
991: cmp_res = bwscoll(s1, s2, 0);
992:
993: return cmp_res;
994: }
995: }
996:
997: /*
998: * Implements version sort (-V).
999: */
1000: static int
1001: versioncoll(struct key_value *kv1, struct key_value *kv2,
1002: size_t offset __unused)
1003: {
1004: struct bwstring *s1, *s2;
1005:
1006: s1 = kv1->k;
1007: s2 = kv2->k;
1008:
1009: if (debug_sort) {
1010: bwsprintf(stdout, s1, "; k1=<", ">");
1011: bwsprintf(stdout, s2, ", k2=<", ">");
1012: }
1013:
1014: if (s1 == s2)
1015: return 0;
1016:
1017: return vcmp(s1, s2);
1018: }
1019:
1020: /*
1021: * Check for minus infinity
1022: */
1023: static inline bool
1024: huge_minus(double d, int err1)
1025: {
1026: if (err1 == ERANGE)
1027: if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1028: return 1;
1029:
1030: return 0;
1031: }
1032:
1033: /*
1034: * Check for plus infinity
1035: */
1036: static inline bool
1037: huge_plus(double d, int err1)
1038: {
1039: if (err1 == ERANGE)
1040: if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1041: return 1;
1042:
1043: return 0;
1044: }
1045:
1046: /*
1047: * Check whether a function is a NAN
1048: */
1049: static bool
1050: is_nan(double d)
1051: {
1.3 miod 1052: #if defined(NAN)
1.1 millert 1053: return (d == NAN || isnan(d));
1.3 miod 1054: #else
1055: return (isnan(d));
1056: #endif
1.1 millert 1057: }
1058:
1059: /*
1060: * Compare two NANs
1061: */
1062: static int
1063: cmp_nans(double d1, double d2)
1064: {
1065: if (d1 == d2)
1066: return 0;
1067: return d1 < d2 ? -1 : 1;
1068: }
1069:
1070: /*
1071: * Implements general numeric sort (-g).
1072: */
1073: static int
1074: gnumcoll(struct key_value *kv1, struct key_value *kv2,
1075: size_t offset __unused)
1076: {
1077: double d1, d2;
1078: int err1, err2;
1079: bool empty1, empty2, key1_read, key2_read;
1080:
1081: d1 = d2 = 0;
1082: err1 = err2 = 0;
1083: key1_read = key2_read = false;
1084:
1085: if (debug_sort) {
1086: bwsprintf(stdout, kv1->k, "; k1=<", ">");
1087: bwsprintf(stdout, kv2->k, "; k2=<", ">");
1088: }
1089:
1090: if (kv1->hint->status == HS_UNINITIALIZED) {
1091: errno = 0;
1092: d1 = bwstod(kv1->k, &empty1);
1093: err1 = errno;
1094:
1.10 millert 1095: if (empty1) {
1.1 millert 1096: kv1->hint->v.gh.notnum = true;
1.10 millert 1097: kv1->hint->status = HS_INITIALIZED;
1098: } else if (err1 == 0) {
1.1 millert 1099: kv1->hint->v.gh.d = d1;
1100: kv1->hint->v.gh.nan = is_nan(d1);
1101: kv1->hint->status = HS_INITIALIZED;
1102: } else
1103: kv1->hint->status = HS_ERROR;
1104:
1105: key1_read = true;
1106: }
1107:
1108: if (kv2->hint->status == HS_UNINITIALIZED) {
1109: errno = 0;
1110: d2 = bwstod(kv2->k, &empty2);
1111: err2 = errno;
1112:
1.10 millert 1113: if (empty2) {
1.1 millert 1114: kv2->hint->v.gh.notnum = true;
1.10 millert 1115: kv2->hint->status = HS_INITIALIZED;
1116: } else if (err2 == 0) {
1.1 millert 1117: kv2->hint->v.gh.d = d2;
1118: kv2->hint->v.gh.nan = is_nan(d2);
1119: kv2->hint->status = HS_INITIALIZED;
1120: } else
1121: kv2->hint->status = HS_ERROR;
1122:
1123: key2_read = true;
1124: }
1125:
1126: if (kv1->hint->status == HS_INITIALIZED &&
1127: kv2->hint->status == HS_INITIALIZED) {
1.10 millert 1128: #ifdef GNUSORT_COMPATIBILITY
1.1 millert 1129: if (kv1->hint->v.gh.notnum)
1130: return kv2->hint->v.gh.notnum ? 0 : -1;
1131: else if (kv2->hint->v.gh.notnum)
1132: return 1;
1.10 millert 1133: #else
1134: if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum)
1135: return 0;
1136: #endif
1.1 millert 1137:
1138: if (kv1->hint->v.gh.nan)
1139: return kv2->hint->v.gh.nan ?
1140: cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
1141: else if (kv2->hint->v.gh.nan)
1142: return 1;
1143:
1144: d1 = kv1->hint->v.gh.d;
1145: d2 = kv2->hint->v.gh.d;
1146:
1147: if (d1 < d2)
1148: return -1;
1149: else if (d1 > d2)
1150: return 1;
1151: else
1152: return 0;
1153: }
1154:
1155: if (!key1_read) {
1156: errno = 0;
1157: d1 = bwstod(kv1->k, &empty1);
1158: err1 = errno;
1159: }
1160:
1161: if (!key2_read) {
1162: errno = 0;
1163: d2 = bwstod(kv2->k, &empty2);
1164: err2 = errno;
1165: }
1166:
1.10 millert 1167: /* Non-value case */
1168: #ifdef GNUSORT_COMPATIBILITY
1.1 millert 1169: if (empty1)
1170: return empty2 ? 0 : -1;
1171: else if (empty2)
1172: return 1;
1.10 millert 1173: #else
1174: if (empty1 && empty2)
1175: return 0;
1176: #endif
1.1 millert 1177:
1178: /* NAN case */
1179: if (is_nan(d1))
1180: return is_nan(d2) ? cmp_nans(d1, d2) : -1;
1181: else if (is_nan(d2))
1182: return 1;
1183:
1184: /* Infinities */
1185: if (err1 == ERANGE || err2 == ERANGE) {
1186: /* Minus infinity case */
1187: if (huge_minus(d1, err1)) {
1188: if (huge_minus(d2, err2)) {
1189: if (d1 == d2)
1190: return 0;
1191: return d1 < d2 ? -1 : 1;
1192: } else
1193: return -1;
1194:
1195: } else if (huge_minus(d2, err2)) {
1196: if (huge_minus(d1, err1)) {
1197: if (d1 == d2)
1198: return 0;
1199: return d1 < d2 ? -1 : 1;
1200: } else
1201: return 1;
1202: }
1203:
1204: /* Plus infinity case */
1205: if (huge_plus(d1, err1)) {
1206: if (huge_plus(d2, err2)) {
1207: if (d1 == d2)
1208: return 0;
1209: return d1 < d2 ? -1 : 1;
1210: } else
1211: return 1;
1212: } else if (huge_plus(d2, err2)) {
1213: if (huge_plus(d1, err1)) {
1214: if (d1 == d2)
1215: return 0;
1216: return d1 < d2 ? -1 : 1;
1217: } else
1218: return -1;
1219: }
1220: }
1221:
1222: if (d1 == d2)
1223: return 0;
1224: return d1 < d2 ? -1 : 1;
1225: }
1226:
1227: /*
1228: * Implements month sort (-M).
1229: */
1230: static int
1231: monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1232: {
1233: int val1, val2;
1234: bool key1_read, key2_read;
1235:
1236: val1 = val2 = 0;
1237: key1_read = key2_read = false;
1238:
1239: if (debug_sort) {
1240: bwsprintf(stdout, kv1->k, "; k1=<", ">");
1241: bwsprintf(stdout, kv2->k, "; k2=<", ">");
1242: }
1243:
1244: if (kv1->hint->status == HS_UNINITIALIZED) {
1245: kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1246: key1_read = true;
1247: kv1->hint->status = HS_INITIALIZED;
1248: }
1249:
1250: if (kv2->hint->status == HS_UNINITIALIZED) {
1251: kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1252: key2_read = true;
1253: kv2->hint->status = HS_INITIALIZED;
1254: }
1255:
1256: if (kv1->hint->status == HS_INITIALIZED) {
1257: val1 = kv1->hint->v.Mh.m;
1258: key1_read = true;
1259: }
1260:
1261: if (kv2->hint->status == HS_INITIALIZED) {
1262: val2 = kv2->hint->v.Mh.m;
1263: key2_read = true;
1264: }
1265:
1266: if (!key1_read)
1267: val1 = bws_month_score(kv1->k);
1268: if (!key2_read)
1269: val2 = bws_month_score(kv2->k);
1270:
1271: if (val1 == val2)
1272: return 0;
1273: return val1 < val2 ? -1 : 1;
1274: }