src/usr.bin/sort/coll.c - annotate

Return to coll.c CVS log
Up to [local] / src / usr.bin / sort
Annotation of src/usr.bin/sort/coll.c, Revision 1.3

1.3     ! miod        1: /*     $OpenBSD: coll.c,v 1.2 2015/03/17 17:49:27 millert Exp $        */
1.1       millert     2:
                      3: /*-
                      4:  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
                      5:  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  */
                     29:
                     30: #include <sys/types.h>
                     31:
                     32: #include <errno.h>
                     33: #include <err.h>
                     34: #include <langinfo.h>
                     35: #include <limits.h>
                     36: #include <math.h>
                     37: #include <md5.h>
                     38: #include <stdlib.h>
                     39: #include <string.h>
                     40: #include <wchar.h>
                     41: #include <wctype.h>
                     42:
                     43: #include "coll.h"
                     44: #include "vsort.h"
                     45:
                     46: struct key_specs *keys;
                     47: size_t keys_num = 0;
                     48:
                     49: wint_t symbol_decimal_point = L'.';
                     50: /* there is no default thousands separator in collate rules: */
                     51: wint_t symbol_thousands_sep = 0;
                     52: wint_t symbol_negative_sign = L'-';
                     53: wint_t symbol_positive_sign = L'+';
                     54:
                     55: static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
                     56: static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
                     57: static int monthcoll(struct key_value*, struct key_value *, size_t offset);
                     58: static int numcoll(struct key_value*, struct key_value *, size_t offset);
                     59: static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
                     60: static int randomcoll(struct key_value*, struct key_value *, size_t offset);
                     61: static int versioncoll(struct key_value*, struct key_value *, size_t offset);
                     62:
                     63: /*
                     64:  * Allocate keys array
                     65:  */
                     66: struct keys_array *
                     67: keys_array_alloc(void)
                     68: {
                     69:        struct keys_array *ka;
                     70:        size_t sz;
                     71:
                     72:        sz = keys_array_size();
1.2       millert    73:        ka = sort_calloc(1, sz);
1.1       millert    74:
                     75:        return ka;
                     76: }
                     77:
                     78: /*
                     79:  * Calculate whether we need key hint space
                     80:  */
                     81: static size_t
                     82: key_hint_size(void)
                     83: {
                     84:
                     85:        return need_hint ? sizeof(struct key_hint) : 0;
                     86: }
                     87:
                     88: /*
                     89:  * Calculate keys array size
                     90:  */
                     91: size_t
                     92: keys_array_size(void)
                     93: {
                     94:
                     95:        return keys_num * (sizeof(struct key_value) + key_hint_size());
                     96: }
                     97:
                     98: /*
                     99:  * Clean data of keys array
                    100:  */
                    101: void
                    102: clean_keys_array(const struct bwstring *s, struct keys_array *ka)
                    103: {
                    104:
                    105:        if (ka) {
                    106:                size_t i;
                    107:
                    108:                for (i = 0; i < keys_num; ++i)
                    109:                        if (ka->key[i].k && ka->key[i].k != s)
                    110:                                bwsfree(ka->key[i].k);
                    111:                memset(ka, 0, keys_array_size());
                    112:        }
                    113: }
                    114:
                    115: /*
                    116:  * Set value of a key in the keys set
                    117:  */
                    118: void
                    119: set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
                    120: {
                    121:
                    122:        if (ka && keys_num > ind) {
                    123:                struct key_value *kv;
                    124:
                    125:                kv = &(ka->key[ind]);
                    126:
                    127:                if (kv->k && kv->k != s)
                    128:                        bwsfree(kv->k);
                    129:                kv->k = s;
                    130:        }
                    131: }
                    132:
                    133: /*
                    134:  * Initialize a sort list item
                    135:  */
                    136: struct sort_list_item *
                    137: sort_list_item_alloc(void)
                    138: {
                    139:        struct sort_list_item *si;
                    140:        size_t sz;
                    141:
                    142:        sz = sizeof(struct sort_list_item) + keys_array_size();
1.2       millert   143:        si = sort_calloc(1, sz);
1.1       millert   144:
                    145:        return si;
                    146: }
                    147:
                    148: size_t
                    149: sort_list_item_size(struct sort_list_item *si)
                    150: {
                    151:        size_t i, ret = 0;
                    152:
                    153:        if (si) {
                    154:                ret = sizeof(struct sort_list_item) + keys_array_size();
                    155:                if (si->str)
                    156:                        ret += bws_memsize(si->str);
                    157:                for (i = 0; i < keys_num; ++i) {
                    158:                        struct key_value *kv;
                    159:
                    160:                        kv = &(si->ka.key[i]);
                    161:
                    162:                        if (kv->k != si->str)
                    163:                                ret += bws_memsize(kv->k);
                    164:                }
                    165:        }
                    166:        return ret;
                    167: }
                    168:
                    169: /*
                    170:  * Calculate key for a sort list item
                    171:  */
                    172: static void
                    173: sort_list_item_make_key(struct sort_list_item *si)
                    174: {
                    175:
                    176:        preproc(si->str, &(si->ka));
                    177: }
                    178:
                    179: /*
                    180:  * Set value of a sort list item.
                    181:  * Return combined string and keys memory size.
                    182:  */
                    183: void
                    184: sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
                    185: {
                    186:
                    187:        if (si) {
                    188:                clean_keys_array(si->str, &(si->ka));
                    189:                if (si->str) {
                    190:                        if (si->str == str) {
                    191:                                /* we are trying to reset the same string */
                    192:                                return;
                    193:                        } else {
                    194:                                bwsfree(si->str);
                    195:                                si->str = NULL;
                    196:                        }
                    197:                }
                    198:                si->str = str;
                    199:                sort_list_item_make_key(si);
                    200:        }
                    201: }
                    202:
                    203: /*
                    204:  * De-allocate a sort list item object memory
                    205:  */
                    206: void
                    207: sort_list_item_clean(struct sort_list_item *si)
                    208: {
                    209:
                    210:        if (si) {
                    211:                clean_keys_array(si->str, &(si->ka));
                    212:                if (si->str) {
                    213:                        bwsfree(si->str);
                    214:                        si->str = NULL;
                    215:                }
                    216:        }
                    217: }
                    218:
                    219: /*
                    220:  * Skip columns according to specs
                    221:  */
                    222: static size_t
                    223: skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
                    224:     bool skip_blanks, bool *empty_key)
                    225: {
                    226:        if (cols < 1)
                    227:                return BWSLEN(s) + 1;
                    228:
                    229:        if (skip_blanks)
                    230:                while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
                    231:                        ++start;
                    232:
                    233:        while (start < BWSLEN(s) && cols > 1) {
                    234:                --cols;
                    235:                ++start;
                    236:        }
                    237:
                    238:        if (start >= BWSLEN(s))
                    239:                *empty_key = true;
                    240:
                    241:        return start;
                    242: }
                    243:
                    244: /*
                    245:  * Skip fields according to specs
                    246:  */
                    247: static size_t
                    248: skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
                    249: {
                    250:
                    251:        if (fields < 2) {
                    252:                if (BWSLEN(s) == 0)
                    253:                        *empty_field = true;
                    254:                return 0;
                    255:        } else if (!(sort_opts_vals.tflag)) {
                    256:                size_t cpos = 0;
                    257:                bool pb = true;
                    258:
                    259:                while (cpos < BWSLEN(s)) {
                    260:                        bool isblank;
                    261:
                    262:                        isblank = iswblank(BWS_GET(s, cpos));
                    263:
                    264:                        if (isblank && !pb) {
                    265:                                --fields;
                    266:                                if (fields <= 1)
                    267:                                        return cpos;
                    268:                        }
                    269:                        pb = isblank;
                    270:                        ++cpos;
                    271:                }
                    272:                if (fields > 1)
                    273:                        *empty_field = true;
                    274:                return cpos;
                    275:        } else {
                    276:                size_t cpos = 0;
                    277:
                    278:                while (cpos < BWSLEN(s)) {
                    279:                        if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
                    280:                                --fields;
                    281:                                if (fields <= 1)
                    282:                                        return cpos + 1;
                    283:                        }
                    284:                        ++cpos;
                    285:                }
                    286:                if (fields > 1)
                    287:                        *empty_field = true;
                    288:                return cpos;
                    289:        }
                    290: }
                    291:
                    292: /*
                    293:  * Find fields start
                    294:  */
                    295: static void
                    296: find_field_start(const struct bwstring *s, struct key_specs *ks,
                    297:     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
                    298: {
                    299:
                    300:        *field_start = skip_fields_to_start(s, ks->f1, empty_field);
                    301:        if (!*empty_field)
                    302:                *key_start = skip_cols_to_start(s, ks->c1, *field_start,
                    303:                    ks->pos1b, empty_key);
                    304:        else
                    305:                *empty_key = true;
                    306: }
                    307:
                    308: /*
                    309:  * Find end key position
                    310:  */
                    311: static size_t
                    312: find_field_end(const struct bwstring *s, struct key_specs *ks)
                    313: {
                    314:        size_t f2, next_field_start, pos_end;
                    315:        bool empty_field, empty_key;
                    316:
                    317:        empty_field = false;
                    318:        empty_key = false;
                    319:        f2 = ks->f2;
                    320:
                    321:        if (f2 == 0)
                    322:                return BWSLEN(s) + 1;
                    323:        else {
                    324:                if (ks->c2 == 0) {
                    325:                        next_field_start = skip_fields_to_start(s, f2 + 1,
                    326:                            &empty_field);
                    327:                        if ((next_field_start > 0) && sort_opts_vals.tflag &&
                    328:                            ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
                    329:                            next_field_start - 1)))
                    330:                                --next_field_start;
                    331:                } else
                    332:                        next_field_start = skip_fields_to_start(s, f2,
                    333:                            &empty_field);
                    334:        }
                    335:
                    336:        if (empty_field || (next_field_start >= BWSLEN(s)))
                    337:                return BWSLEN(s) + 1;
                    338:
                    339:        if (ks->c2) {
                    340:                pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
                    341:                    ks->pos2b, &empty_key);
                    342:                if (pos_end < BWSLEN(s))
                    343:                        ++pos_end;
                    344:        } else
                    345:                pos_end = next_field_start;
                    346:
                    347:        return pos_end;
                    348: }
                    349:
                    350: /*
                    351:  * Cut a field according to the key specs
                    352:  */
                    353: static struct bwstring *
                    354: cut_field(const struct bwstring *s, struct key_specs *ks)
                    355: {
                    356:        struct bwstring *ret = NULL;
                    357:
                    358:        if (s && ks) {
                    359:                size_t field_start, key_end, key_start, sz;
                    360:                bool empty_field, empty_key;
                    361:
                    362:                field_start = 0;
                    363:                key_start = 0;
                    364:                empty_field = false;
                    365:                empty_key = false;
                    366:
                    367:                find_field_start(s, ks, &field_start, &key_start,
                    368:                    &empty_field, &empty_key);
                    369:
                    370:                if (empty_key)
                    371:                        sz = 0;
                    372:                else {
                    373:                        key_end = find_field_end(s, ks);
                    374:                        sz = (key_end < key_start) ? 0 : (key_end - key_start);
                    375:                }
                    376:
                    377:                ret = bwsalloc(sz);
                    378:                if (sz)
                    379:                        bwsnocpy(ret, s, key_start, sz);
                    380:        } else
                    381:                ret = bwsalloc(0);
                    382:
                    383:        return ret;
                    384: }
                    385:
                    386: /*
                    387:  * Preprocesses a line applying the necessary transformations
                    388:  * specified by command line options and returns the preprocessed
                    389:  * string, which can be used to compare.
                    390:  */
                    391: int
                    392: preproc(struct bwstring *s, struct keys_array *ka)
                    393: {
                    394:
                    395:        if (sort_opts_vals.kflag) {
                    396:                size_t i;
                    397:                for (i = 0; i < keys_num; i++) {
                    398:                        struct bwstring *key;
                    399:                        struct key_specs *kspecs;
                    400:                        struct sort_mods *sm;
                    401:
                    402:                        kspecs = &(keys[i]);
                    403:                        key = cut_field(s, kspecs);
                    404:
                    405:                        sm = &(kspecs->sm);
                    406:                        if (sm->dflag)
                    407:                                key = dictionary_order(key);
                    408:                        else if (sm->iflag)
                    409:                                key = ignore_nonprinting(key);
                    410:                        if (sm->fflag || sm->Mflag)
                    411:                                key = ignore_case(key);
                    412:
                    413:                        set_key_on_keys_array(ka, key, i);
                    414:                }
                    415:        } else {
                    416:                struct bwstring *ret = NULL;
                    417:                struct sort_mods *sm = default_sort_mods;
                    418:
                    419:                if (sm->bflag) {
                    420:                        if (ret == NULL)
                    421:                                ret = bwsdup(s);
                    422:                        ret = ignore_leading_blanks(ret);
                    423:                }
                    424:                if (sm->dflag) {
                    425:                        if (ret == NULL)
                    426:                                ret = bwsdup(s);
                    427:                        ret = dictionary_order(ret);
                    428:                } else if (sm->iflag) {
                    429:                        if (ret == NULL)
                    430:                                ret = bwsdup(s);
                    431:                        ret = ignore_nonprinting(ret);
                    432:                }
                    433:                if (sm->fflag || sm->Mflag) {
                    434:                        if (ret == NULL)
                    435:                                ret = bwsdup(s);
                    436:                        ret = ignore_case(ret);
                    437:                }
                    438:                if (ret == NULL)
                    439:                        set_key_on_keys_array(ka, s, 0);
                    440:                else
                    441:                        set_key_on_keys_array(ka, ret, 0);
                    442:        }
                    443:
                    444:        return 0;
                    445: }
                    446:
                    447: cmpcoll_t
                    448: get_sort_func(struct sort_mods *sm)
                    449: {
                    450:
                    451:        if (sm->nflag)
                    452:                return numcoll;
                    453:        else if (sm->hflag)
                    454:                return hnumcoll;
                    455:        else if (sm->gflag)
                    456:                return gnumcoll;
                    457:        else if (sm->Mflag)
                    458:                return monthcoll;
                    459:        else if (sm->Rflag)
                    460:                return randomcoll;
                    461:        else if (sm->Vflag)
                    462:                return versioncoll;
                    463:        else
                    464:                return wstrcoll;
                    465: }
                    466:
                    467: /*
                    468:  * Compares the given strings.  Returns a positive number if
                    469:  * the first precedes the second, a negative number if the second is
                    470:  * the preceding one, and zero if they are equal.  This function calls
                    471:  * the underlying collate functions, which done the actual comparison.
                    472:  */
                    473: int
                    474: key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
                    475: {
                    476:        struct sort_mods *sm;
                    477:        int res = 0;
                    478:        size_t i;
                    479:
                    480:        for (i = 0; i < keys_num; ++i) {
                    481:                sm = &(keys[i].sm);
                    482:
                    483:                if (sm->rflag)
                    484:                        res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
                    485:                else
                    486:                        res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
                    487:
                    488:                if (res)
                    489:                        break;
                    490:
                    491:                /* offset applies to only the first key */
                    492:                offset = 0;
                    493:        }
                    494:        return res;
                    495: }
                    496:
                    497: /*
                    498:  * Compare two strings.
                    499:  * Plain symbol-by-symbol comparison.
                    500:  */
                    501: int
                    502: top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
                    503: {
                    504:
                    505:        if (default_sort_mods->rflag) {
                    506:                const struct bwstring *tmp;
                    507:
                    508:                tmp = s1;
                    509:                s1 = s2;
                    510:                s2 = tmp;
                    511:        }
                    512:
                    513:        return bwscoll(s1, s2, 0);
                    514: }
                    515:
                    516: /*
                    517:  * Compare a string and a sort list item, according to the sort specs.
                    518:  */
                    519: int
                    520: str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
                    521: {
                    522:        struct keys_array *ka1;
                    523:        int ret = 0;
                    524:
                    525:        ka1 = keys_array_alloc();
                    526:
                    527:        preproc(str1, ka1);
                    528:
                    529:        sort_list_item_make_key(*ss2);
                    530:
                    531:        if (debug_sort) {
                    532:                bwsprintf(stdout, str1, "; s1=<", ">");
                    533:                bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
                    534:        }
                    535:
                    536:        ret = key_coll(ka1, &((*ss2)->ka), 0);
                    537:
                    538:        if (debug_sort)
                    539:                printf("; cmp1=%d", ret);
                    540:
                    541:        clean_keys_array(str1, ka1);
                    542:        sort_free(ka1);
                    543:
                    544:        if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
                    545:                ret = top_level_str_coll(str1, ((*ss2)->str));
                    546:                if (debug_sort)
                    547:                        printf("; cmp2=%d", ret);
                    548:        }
                    549:
                    550:        if (debug_sort)
                    551:                putchar('\n');
                    552:
                    553:        return ret;
                    554: }
                    555:
                    556: /*
                    557:  * Compare two sort list items, according to the sort specs.
                    558:  */
                    559: int
                    560: list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
                    561:     size_t offset)
                    562: {
                    563:        int ret;
                    564:
                    565:        ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
                    566:
                    567:        if (debug_sort) {
                    568:                if (offset)
                    569:                        printf("; offset=%d", (int) offset);
                    570:                bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
                    571:                bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
                    572:                printf("; cmp1=%d\n", ret);
                    573:        }
                    574:
                    575:        if (ret)
                    576:                return ret;
                    577:
                    578:        if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
                    579:                ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
                    580:                if (debug_sort)
                    581:                        printf("; cmp2=%d\n", ret);
                    582:        }
                    583:
                    584:        return ret;
                    585: }
                    586:
                    587: /*
                    588:  * Compare two sort list items, according to the sort specs.
                    589:  */
                    590: int
                    591: list_coll(const void *ss1, const void *ss2)
                    592: {
                    593:
                    594:        return list_coll_offset((struct sort_list_item **)ss1,
                    595:            (struct sort_list_item **)ss2, 0);
                    596: }
                    597:
                    598: #define LSCDEF(N)                                                                                      \
                    599: static int                                                                                             \
                    600: list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)                                        \
                    601: {                                                                                                      \
                    602:                                                                                                        \
                    603:        return list_coll_offset(ss1, ss2, N);                                                           \
                    604: }
                    605:
                    606: LSCDEF(0)
                    607: LSCDEF(1)
                    608: LSCDEF(2)
                    609: LSCDEF(3)
                    610: LSCDEF(4)
                    611: LSCDEF(5)
                    612: LSCDEF(6)
                    613: LSCDEF(7)
                    614: LSCDEF(8)
                    615: LSCDEF(9)
                    616: LSCDEF(10)
                    617: LSCDEF(11)
                    618: LSCDEF(12)
                    619: LSCDEF(13)
                    620: LSCDEF(14)
                    621: LSCDEF(15)
                    622: LSCDEF(16)
                    623: LSCDEF(17)
                    624: LSCDEF(18)
                    625: LSCDEF(19)
                    626: LSCDEF(20)
                    627:
                    628: listcoll_t
                    629: get_list_call_func(size_t offset)
                    630: {
                    631:        static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
                    632:            list_coll_2, list_coll_3, list_coll_4, list_coll_5,
                    633:            list_coll_6, list_coll_7, list_coll_8, list_coll_9,
                    634:            list_coll_10, list_coll_11, list_coll_12, list_coll_13,
                    635:            list_coll_14, list_coll_15, list_coll_16, list_coll_17,
                    636:            list_coll_18, list_coll_19, list_coll_20 };
                    637:
                    638:        if (offset <= 20)
                    639:                return lsarray[offset];
                    640:
                    641:        return list_coll_0;
                    642: }
                    643:
                    644: /*
                    645:  * Compare two sort list items, only by their original string.
                    646:  */
                    647: int
                    648: list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
                    649: {
                    650:
                    651:        return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
                    652: }
                    653:
                    654: /*
                    655:  * Maximum size of a number in the string (before or after decimal point)
                    656:  */
                    657: #define MAX_NUM_SIZE (128)
                    658:
                    659: /*
                    660:  * Set suffix value
                    661:  */
                    662: static void
                    663: setsuffix(wchar_t c, unsigned char *si)
                    664: {
                    665:        switch (c){
                    666:        case L'k':
                    667:        case L'K':
                    668:                *si = 1;
                    669:                break;
                    670:        case L'M':
                    671:                *si = 2;
                    672:                break;
                    673:        case L'G':
                    674:                *si = 3;
                    675:                break;
                    676:        case L'T':
                    677:                *si = 4;
                    678:                break;
                    679:        case L'P':
                    680:                *si = 5;
                    681:                break;
                    682:        case L'E':
                    683:                *si = 6;
                    684:                break;
                    685:        case L'Z':
                    686:                *si = 7;
                    687:                break;
                    688:        case L'Y':
                    689:                *si = 8;
                    690:                break;
                    691:        default:
                    692:                *si = 0;
                    693:        };
                    694: }
                    695:
                    696: /*
                    697:  * Read string s and parse the string into a fixed-decimal-point number.
                    698:  * sign equals -1 if the number is negative (explicit plus is not allowed,
                    699:  * according to GNU sort's "info sort".
                    700:  * The number part before decimal point is in the smain, after the decimal
                    701:  * point is in sfrac, tail is the pointer to the remainder of the string.
                    702:  */
                    703: static int
                    704: read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
                    705: {
                    706:        bwstring_iterator s;
                    707:
                    708:        s = bws_begin(s0);
                    709:
                    710:        /* always end the fraction with zero, even if we have no fraction */
                    711:        sfrac[0] = 0;
                    712:
                    713:        while (iswblank(bws_get_iter_value(s)))
                    714:                s = bws_iterator_inc(s, 1);
                    715:
                    716:        if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
                    717:                *sign = -1;
                    718:                s = bws_iterator_inc(s, 1);
                    719:        }
                    720:
                    721:        // This is '0', not '\0', do not change this
                    722:        while (iswdigit(bws_get_iter_value(s)) &&
                    723:            (bws_get_iter_value(s) == L'0'))
                    724:                s = bws_iterator_inc(s, 1);
                    725:
                    726:        while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
                    727:                if (iswdigit(bws_get_iter_value(s))) {
                    728:                        smain[*main_len] = bws_get_iter_value(s);
                    729:                        s = bws_iterator_inc(s, 1);
                    730:                        *main_len += 1;
                    731:                } else if (symbol_thousands_sep &&
                    732:                    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
                    733:                        s = bws_iterator_inc(s, 1);
                    734:                else
                    735:                        break;
                    736:        }
                    737:
                    738:        smain[*main_len] = 0;
                    739:
                    740:        if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
                    741:                s = bws_iterator_inc(s, 1);
                    742:                while (iswdigit(bws_get_iter_value(s)) &&
                    743:                    *frac_len < MAX_NUM_SIZE) {
                    744:                        sfrac[*frac_len] = bws_get_iter_value(s);
                    745:                        s = bws_iterator_inc(s, 1);
                    746:                        *frac_len += 1;
                    747:                }
                    748:                sfrac[*frac_len] = 0;
                    749:
                    750:                while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
                    751:                        --(*frac_len);
                    752:                        sfrac[*frac_len] = L'\0';
                    753:                }
                    754:        }
                    755:
                    756:        setsuffix(bws_get_iter_value(s), si);
                    757:
                    758:        if ((*main_len + *frac_len) == 0)
                    759:                *sign = 0;
                    760:
                    761:        return 0;
                    762: }
                    763:
                    764: /*
                    765:  * Implements string sort.
                    766:  */
                    767: static int
                    768: wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
                    769: {
                    770:
                    771:        if (debug_sort) {
                    772:                if (offset)
                    773:                        printf("; offset=%d\n", (int) offset);
                    774:                bwsprintf(stdout, kv1->k, "; k1=<", ">");
                    775:                printf("(%zu)", BWSLEN(kv1->k));
                    776:                bwsprintf(stdout, kv2->k, ", k2=<", ">");
                    777:                printf("(%zu)", BWSLEN(kv2->k));
                    778:        }
                    779:
                    780:        return bwscoll(kv1->k, kv2->k, offset);
                    781: }
                    782:
                    783: /*
                    784:  * Compare two suffixes
                    785:  */
                    786: static inline int
                    787: cmpsuffix(unsigned char si1, unsigned char si2)
                    788: {
                    789:
                    790:        return (char)si1 - (char)si2;
                    791: }
                    792:
                    793: /*
                    794:  * Implements numeric sort for -n and -h.
                    795:  */
                    796: static int
                    797: numcoll_impl(struct key_value *kv1, struct key_value *kv2,
                    798:     size_t offset __unused, bool use_suffix)
                    799: {
                    800:        struct bwstring *s1, *s2;
                    801:        wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
                    802:        wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
                    803:        int cmp_res, sign1, sign2;
                    804:        size_t frac1, frac2, main1, main2;
                    805:        unsigned char SI1, SI2;
                    806:        bool e1, e2, key1_read, key2_read;
                    807:
                    808:        s1 = kv1->k;
                    809:        s2 = kv2->k;
                    810:        sign1 = sign2 = 0;
                    811:        main1 = main2 = 0;
                    812:        frac1 = frac2 = 0;
                    813:
                    814:        key1_read = key2_read = false;
                    815:
                    816:        if (debug_sort) {
                    817:                bwsprintf(stdout, s1, "; k1=<", ">");
                    818:                bwsprintf(stdout, s2, ", k2=<", ">");
                    819:        }
                    820:
                    821:        if (s1 == s2)
                    822:                return 0;
                    823:
                    824:        if (kv1->hint->status == HS_UNINITIALIZED) {
                    825:                /* read the number from the string */
                    826:                read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
                    827:                key1_read = true;
                    828:                kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
                    829:                if (main1 < 1 && frac1 < 1)
                    830:                        kv1->hint->v.nh.empty=true;
                    831:                kv1->hint->v.nh.si = SI1;
                    832:                kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
                    833:                    HS_INITIALIZED : HS_ERROR;
                    834:                kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
                    835:        }
                    836:
                    837:        if (kv2->hint->status == HS_UNINITIALIZED) {
                    838:                /* read the number from the string */
                    839:                read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
                    840:                key2_read = true;
                    841:                kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
                    842:                if (main2 < 1 && frac2 < 1)
                    843:                        kv2->hint->v.nh.empty=true;
                    844:                kv2->hint->v.nh.si = SI2;
                    845:                kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
                    846:                    HS_INITIALIZED : HS_ERROR;
                    847:                kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
                    848:        }
                    849:
                    850:        if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
                    851:            HS_INITIALIZED) {
                    852:                unsigned long long n1, n2;
                    853:                bool neg1, neg2;
                    854:
                    855:                e1 = kv1->hint->v.nh.empty;
                    856:                e2 = kv2->hint->v.nh.empty;
                    857:
                    858:                if (e1 && e2)
                    859:                        return 0;
                    860:
                    861:                neg1 = kv1->hint->v.nh.neg;
                    862:                neg2 = kv2->hint->v.nh.neg;
                    863:
                    864:                if (neg1 && !neg2)
                    865:                        return -1;
                    866:                if (neg2 && !neg1)
                    867:                        return 1;
                    868:
                    869:                if (e1)
                    870:                        return neg2 ? 1 : -1;
                    871:                else if (e2)
                    872:                        return neg1 ? -1 : 1;
                    873:
                    874:
                    875:                if (use_suffix) {
                    876:                        cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
                    877:                        if (cmp_res)
                    878:                                return neg1 ? -cmp_res : cmp_res;
                    879:                }
                    880:
                    881:                n1 = kv1->hint->v.nh.n1;
                    882:                n2 = kv2->hint->v.nh.n1;
                    883:                if (n1 < n2)
                    884:                        return neg1 ? 1 : -1;
                    885:                else if (n1 > n2)
                    886:                        return neg1 ? -1 : 1;
                    887:        }
                    888:
                    889:        /* read the numbers from the strings */
                    890:        if (!key1_read)
                    891:                read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
                    892:        if (!key2_read)
                    893:                read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
                    894:
                    895:        e1 = ((main1 + frac1) == 0);
                    896:        e2 = ((main2 + frac2) == 0);
                    897:
                    898:        if (e1 && e2)
                    899:                return 0;
                    900:
                    901:        /* we know the result if the signs are different */
                    902:        if (sign1 < 0 && sign2 >= 0)
                    903:                return -1;
                    904:        if (sign1 >= 0 && sign2 < 0)
                    905:                return 1;
                    906:
                    907:        if (e1)
                    908:                return (sign2 < 0) ? +1 : -1;
                    909:        else if (e2)
                    910:                return (sign1 < 0) ? -1 : 1;
                    911:
                    912:        if (use_suffix) {
                    913:                cmp_res = cmpsuffix(SI1, SI2);
                    914:                if (cmp_res)
                    915:                        return (sign1 < 0) ? -cmp_res : cmp_res;
                    916:        }
                    917:
                    918:        /* if both numbers are empty assume that the strings are equal */
                    919:        if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
                    920:                return 0;
                    921:
                    922:        /*
                    923:         * if the main part is of different size, we know the result
                    924:         * (because the leading zeros are removed)
                    925:         */
                    926:        if (main1 < main2)
                    927:                cmp_res = -1;
                    928:        else if (main1 > main2)
                    929:                cmp_res = +1;
                    930:        /* if the sizes are equal then simple non-collate string compare gives the correct result */
                    931:        else
                    932:                cmp_res = wcscmp(smain1, smain2);
                    933:
                    934:        /* check fraction */
                    935:        if (!cmp_res)
                    936:                cmp_res = wcscmp(sfrac1, sfrac2);
                    937:
                    938:        if (!cmp_res)
                    939:                return 0;
                    940:
                    941:        /* reverse result if the signs are negative */
                    942:        if (sign1 < 0 && sign2 < 0)
                    943:                cmp_res = -cmp_res;
                    944:
                    945:        return cmp_res;
                    946: }
                    947:
                    948: /*
                    949:  * Implements numeric sort (-n).
                    950:  */
                    951: static int
                    952: numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
                    953: {
                    954:
                    955:        return numcoll_impl(kv1, kv2, offset, false);
                    956: }
                    957:
                    958: /*
                    959:  * Implements 'human' numeric sort (-h).
                    960:  */
                    961: static int
                    962: hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
                    963: {
                    964:
                    965:        return numcoll_impl(kv1, kv2, offset, true);
                    966: }
                    967:
                    968: /*
                    969:  * Implements random sort (-R).
                    970:  */
                    971: static int
                    972: randomcoll(struct key_value *kv1, struct key_value *kv2,
                    973:     size_t offset __unused)
                    974: {
                    975:        struct bwstring *s1, *s2;
                    976:        MD5_CTX ctx1, ctx2;
                    977:        char *b1, *b2;
                    978:
                    979:        s1 = kv1->k;
                    980:        s2 = kv2->k;
                    981:
                    982:        if (debug_sort) {
                    983:                bwsprintf(stdout, s1, "; k1=<", ">");
                    984:                bwsprintf(stdout, s2, ", k2=<", ">");
                    985:        }
                    986:
                    987:        if (s1 == s2)
                    988:                return 0;
                    989:
                    990:        memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
                    991:        memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
                    992:
                    993:        MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
                    994:        MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
                    995:        b1 = MD5End(&ctx1, NULL);
                    996:        b2 = MD5End(&ctx2, NULL);
                    997:        if (b1 == NULL) {
                    998:                if (b2 == NULL)
                    999:                        return 0;
                   1000:                else {
                   1001:                        sort_free(b2);
                   1002:                        return -1;
                   1003:                }
                   1004:        } else if (b2 == NULL) {
                   1005:                sort_free(b1);
                   1006:                return 1;
                   1007:        } else {
                   1008:                int cmp_res;
                   1009:
                   1010:                cmp_res = strcmp(b1, b2);
                   1011:                sort_free(b1);
                   1012:                sort_free(b2);
                   1013:
                   1014:                if (!cmp_res)
                   1015:                        cmp_res = bwscoll(s1, s2, 0);
                   1016:
                   1017:                return cmp_res;
                   1018:        }
                   1019: }
                   1020:
                   1021: /*
                   1022:  * Implements version sort (-V).
                   1023:  */
                   1024: static int
                   1025: versioncoll(struct key_value *kv1, struct key_value *kv2,
                   1026:     size_t offset __unused)
                   1027: {
                   1028:        struct bwstring *s1, *s2;
                   1029:
                   1030:        s1 = kv1->k;
                   1031:        s2 = kv2->k;
                   1032:
                   1033:        if (debug_sort) {
                   1034:                bwsprintf(stdout, s1, "; k1=<", ">");
                   1035:                bwsprintf(stdout, s2, ", k2=<", ">");
                   1036:        }
                   1037:
                   1038:        if (s1 == s2)
                   1039:                return 0;
                   1040:
                   1041:        return vcmp(s1, s2);
                   1042: }
                   1043:
                   1044: /*
                   1045:  * Check for minus infinity
                   1046:  */
                   1047: static inline bool
                   1048: huge_minus(double d, int err1)
                   1049: {
                   1050:
                   1051:        if (err1 == ERANGE)
                   1052:                if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
                   1053:                        return 1;
                   1054:
                   1055:        return 0;
                   1056: }
                   1057:
                   1058: /*
                   1059:  * Check for plus infinity
                   1060:  */
                   1061: static inline bool
                   1062: huge_plus(double d, int err1)
                   1063: {
                   1064:
                   1065:        if (err1 == ERANGE)
                   1066:                if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
                   1067:                        return 1;
                   1068:
                   1069:        return 0;
                   1070: }
                   1071:
                   1072: /*
                   1073:  * Check whether a function is a NAN
                   1074:  */
                   1075: static bool
                   1076: is_nan(double d)
                   1077: {
1.3     ! miod     1078: #if defined(NAN)
1.1       millert  1079:        return (d == NAN || isnan(d));
1.3     ! miod     1080: #else
        !          1081:        return (isnan(d));
        !          1082: #endif
1.1       millert  1083: }
                   1084:
                   1085: /*
                   1086:  * Compare two NANs
                   1087:  */
                   1088: static int
                   1089: cmp_nans(double d1, double d2)
                   1090: {
                   1091:
                   1092:        if (d1 == d2)
                   1093:                return 0;
                   1094:        return d1 < d2 ? -1 : 1;
                   1095: }
                   1096:
                   1097: /*
                   1098:  * Implements general numeric sort (-g).
                   1099:  */
                   1100: static int
                   1101: gnumcoll(struct key_value *kv1, struct key_value *kv2,
                   1102:     size_t offset __unused)
                   1103: {
                   1104:        double d1, d2;
                   1105:        int err1, err2;
                   1106:        bool empty1, empty2, key1_read, key2_read;
                   1107:
                   1108:        d1 = d2 = 0;
                   1109:        err1 = err2 = 0;
                   1110:        key1_read = key2_read = false;
                   1111:
                   1112:        if (debug_sort) {
                   1113:                bwsprintf(stdout, kv1->k, "; k1=<", ">");
                   1114:                bwsprintf(stdout, kv2->k, "; k2=<", ">");
                   1115:        }
                   1116:
                   1117:        if (kv1->hint->status == HS_UNINITIALIZED) {
                   1118:                errno = 0;
                   1119:                d1 = bwstod(kv1->k, &empty1);
                   1120:                err1 = errno;
                   1121:
                   1122:                if (empty1)
                   1123:                        kv1->hint->v.gh.notnum = true;
                   1124:                else if (err1 == 0) {
                   1125:                        kv1->hint->v.gh.d = d1;
                   1126:                        kv1->hint->v.gh.nan = is_nan(d1);
                   1127:                        kv1->hint->status = HS_INITIALIZED;
                   1128:                } else
                   1129:                        kv1->hint->status = HS_ERROR;
                   1130:
                   1131:                key1_read = true;
                   1132:        }
                   1133:
                   1134:        if (kv2->hint->status == HS_UNINITIALIZED) {
                   1135:                errno = 0;
                   1136:                d2 = bwstod(kv2->k, &empty2);
                   1137:                err2 = errno;
                   1138:
                   1139:                if (empty2)
                   1140:                        kv2->hint->v.gh.notnum = true;
                   1141:                else if (err2 == 0) {
                   1142:                        kv2->hint->v.gh.d = d2;
                   1143:                        kv2->hint->v.gh.nan = is_nan(d2);
                   1144:                        kv2->hint->status = HS_INITIALIZED;
                   1145:                } else
                   1146:                        kv2->hint->status = HS_ERROR;
                   1147:
                   1148:                key2_read = true;
                   1149:        }
                   1150:
                   1151:        if (kv1->hint->status == HS_INITIALIZED &&
                   1152:            kv2->hint->status == HS_INITIALIZED) {
                   1153:                if (kv1->hint->v.gh.notnum)
                   1154:                        return kv2->hint->v.gh.notnum ? 0 : -1;
                   1155:                else if (kv2->hint->v.gh.notnum)
                   1156:                        return 1;
                   1157:
                   1158:                if (kv1->hint->v.gh.nan)
                   1159:                        return kv2->hint->v.gh.nan ?
                   1160:                            cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
                   1161:                else if (kv2->hint->v.gh.nan)
                   1162:                        return 1;
                   1163:
                   1164:                d1 = kv1->hint->v.gh.d;
                   1165:                d2 = kv2->hint->v.gh.d;
                   1166:
                   1167:                if (d1 < d2)
                   1168:                        return -1;
                   1169:                else if (d1 > d2)
                   1170:                        return 1;
                   1171:                else
                   1172:                        return 0;
                   1173:        }
                   1174:
                   1175:        if (!key1_read) {
                   1176:                errno = 0;
                   1177:                d1 = bwstod(kv1->k, &empty1);
                   1178:                err1 = errno;
                   1179:        }
                   1180:
                   1181:        if (!key2_read) {
                   1182:                errno = 0;
                   1183:                d2 = bwstod(kv2->k, &empty2);
                   1184:                err2 = errno;
                   1185:        }
                   1186:
                   1187:        /* Non-value case: */
                   1188:        if (empty1)
                   1189:                return empty2 ? 0 : -1;
                   1190:        else if (empty2)
                   1191:                return 1;
                   1192:
                   1193:        /* NAN case */
                   1194:        if (is_nan(d1))
                   1195:                return is_nan(d2) ? cmp_nans(d1, d2) : -1;
                   1196:        else if (is_nan(d2))
                   1197:                return 1;
                   1198:
                   1199:        /* Infinities */
                   1200:        if (err1 == ERANGE || err2 == ERANGE) {
                   1201:                /* Minus infinity case */
                   1202:                if (huge_minus(d1, err1)) {
                   1203:                        if (huge_minus(d2, err2)) {
                   1204:                                if (d1 == d2)
                   1205:                                        return 0;
                   1206:                                return d1 < d2 ? -1 : 1;
                   1207:                        } else
                   1208:                                return -1;
                   1209:
                   1210:                } else if (huge_minus(d2, err2)) {
                   1211:                        if (huge_minus(d1, err1)) {
                   1212:                                if (d1 == d2)
                   1213:                                        return 0;
                   1214:                                return d1 < d2 ? -1 : 1;
                   1215:                        } else
                   1216:                                return 1;
                   1217:                }
                   1218:
                   1219:                /* Plus infinity case */
                   1220:                if (huge_plus(d1, err1)) {
                   1221:                        if (huge_plus(d2, err2)) {
                   1222:                                if (d1 == d2)
                   1223:                                        return 0;
                   1224:                                return d1 < d2 ? -1 : 1;
                   1225:                        } else
                   1226:                                return 1;
                   1227:                } else if (huge_plus(d2, err2)) {
                   1228:                        if (huge_plus(d1, err1)) {
                   1229:                                if (d1 == d2)
                   1230:                                        return 0;
                   1231:                                return d1 < d2 ? -1 : 1;
                   1232:                        } else
                   1233:                                return -1;
                   1234:                }
                   1235:        }
                   1236:
                   1237:        if (d1 == d2)
                   1238:                return 0;
                   1239:        return d1 < d2 ? -1 : 1;
                   1240: }
                   1241:
                   1242: /*
                   1243:  * Implements month sort (-M).
                   1244:  */
                   1245: static int
                   1246: monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
                   1247: {
                   1248:        int val1, val2;
                   1249:        bool key1_read, key2_read;
                   1250:
                   1251:        val1 = val2 = 0;
                   1252:        key1_read = key2_read = false;
                   1253:
                   1254:        if (debug_sort) {
                   1255:                bwsprintf(stdout, kv1->k, "; k1=<", ">");
                   1256:                bwsprintf(stdout, kv2->k, "; k2=<", ">");
                   1257:        }
                   1258:
                   1259:        if (kv1->hint->status == HS_UNINITIALIZED) {
                   1260:                kv1->hint->v.Mh.m = bws_month_score(kv1->k);
                   1261:                key1_read = true;
                   1262:                kv1->hint->status = HS_INITIALIZED;
                   1263:        }
                   1264:
                   1265:        if (kv2->hint->status == HS_UNINITIALIZED) {
                   1266:                kv2->hint->v.Mh.m = bws_month_score(kv2->k);
                   1267:                key2_read = true;
                   1268:                kv2->hint->status = HS_INITIALIZED;
                   1269:        }
                   1270:
                   1271:        if (kv1->hint->status == HS_INITIALIZED) {
                   1272:                val1 = kv1->hint->v.Mh.m;
                   1273:                key1_read = true;
                   1274:        }
                   1275:
                   1276:        if (kv2->hint->status == HS_INITIALIZED) {
                   1277:                val2 = kv2->hint->v.Mh.m;
                   1278:                key2_read = true;
                   1279:        }
                   1280:
                   1281:        if (!key1_read)
                   1282:                val1 = bws_month_score(kv1->k);
                   1283:        if (!key2_read)
                   1284:                val2 = bws_month_score(kv2->k);
                   1285:
                   1286:        if (val1 == val2)
                   1287:                return 0;
                   1288:        return val1 < val2 ? -1 : 1;
                   1289: }