src/usr.bin/sort/coll.c - annotate

Return to coll.c CVS log
Up to [local] / src / usr.bin / sort
Annotation of src/usr.bin/sort/coll.c, Revision 1.7

1.7     ! tobias      1: /*     $OpenBSD: coll.c,v 1.6 2015/04/01 21:47:19 millert Exp $        */
1.1       millert     2:
                      3: /*-
                      4:  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
                      5:  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  */
                     29:
                     30: #include <sys/types.h>
                     31:
                     32: #include <errno.h>
                     33: #include <err.h>
                     34: #include <langinfo.h>
                     35: #include <limits.h>
                     36: #include <math.h>
                     37: #include <md5.h>
                     38: #include <stdlib.h>
                     39: #include <string.h>
                     40: #include <wchar.h>
                     41: #include <wctype.h>
                     42:
                     43: #include "coll.h"
                     44: #include "vsort.h"
                     45:
                     46: struct key_specs *keys;
                     47: size_t keys_num = 0;
                     48:
                     49: wint_t symbol_decimal_point = L'.';
                     50: /* there is no default thousands separator in collate rules: */
                     51: wint_t symbol_thousands_sep = 0;
                     52: wint_t symbol_negative_sign = L'-';
                     53: wint_t symbol_positive_sign = L'+';
                     54:
                     55: static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
                     56: static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
                     57: static int monthcoll(struct key_value*, struct key_value *, size_t offset);
                     58: static int numcoll(struct key_value*, struct key_value *, size_t offset);
                     59: static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
                     60: static int randomcoll(struct key_value*, struct key_value *, size_t offset);
                     61: static int versioncoll(struct key_value*, struct key_value *, size_t offset);
                     62:
                     63: /*
                     64:  * Allocate keys array
                     65:  */
                     66: struct keys_array *
                     67: keys_array_alloc(void)
                     68: {
                     69:        struct keys_array *ka;
                     70:        size_t sz;
                     71:
                     72:        sz = keys_array_size();
1.2       millert    73:        ka = sort_calloc(1, sz);
1.1       millert    74:
                     75:        return ka;
                     76: }
                     77:
                     78: /*
                     79:  * Calculate whether we need key hint space
                     80:  */
                     81: static size_t
                     82: key_hint_size(void)
                     83: {
                     84:        return need_hint ? sizeof(struct key_hint) : 0;
                     85: }
                     86:
                     87: /*
                     88:  * Calculate keys array size
                     89:  */
                     90: size_t
                     91: keys_array_size(void)
                     92: {
                     93:        return keys_num * (sizeof(struct key_value) + key_hint_size());
                     94: }
                     95:
                     96: /*
                     97:  * Clean data of keys array
                     98:  */
                     99: void
                    100: clean_keys_array(const struct bwstring *s, struct keys_array *ka)
                    101: {
                    102:        if (ka) {
                    103:                size_t i;
                    104:
                    105:                for (i = 0; i < keys_num; ++i)
                    106:                        if (ka->key[i].k && ka->key[i].k != s)
                    107:                                bwsfree(ka->key[i].k);
                    108:                memset(ka, 0, keys_array_size());
                    109:        }
                    110: }
                    111:
                    112: /*
                    113:  * Set value of a key in the keys set
                    114:  */
                    115: void
                    116: set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
                    117: {
                    118:        if (ka && keys_num > ind) {
                    119:                struct key_value *kv;
                    120:
                    121:                kv = &(ka->key[ind]);
                    122:
1.7     ! tobias    123:                if (kv->k != s)
1.1       millert   124:                        bwsfree(kv->k);
                    125:                kv->k = s;
                    126:        }
                    127: }
                    128:
                    129: /*
                    130:  * Initialize a sort list item
                    131:  */
                    132: struct sort_list_item *
                    133: sort_list_item_alloc(void)
                    134: {
                    135:        struct sort_list_item *si;
                    136:        size_t sz;
                    137:
                    138:        sz = sizeof(struct sort_list_item) + keys_array_size();
1.2       millert   139:        si = sort_calloc(1, sz);
1.1       millert   140:
                    141:        return si;
                    142: }
                    143:
                    144: size_t
                    145: sort_list_item_size(struct sort_list_item *si)
                    146: {
                    147:        size_t i, ret = 0;
                    148:
                    149:        if (si) {
                    150:                ret = sizeof(struct sort_list_item) + keys_array_size();
                    151:                if (si->str)
                    152:                        ret += bws_memsize(si->str);
                    153:                for (i = 0; i < keys_num; ++i) {
                    154:                        struct key_value *kv;
                    155:
                    156:                        kv = &(si->ka.key[i]);
                    157:
                    158:                        if (kv->k != si->str)
                    159:                                ret += bws_memsize(kv->k);
                    160:                }
                    161:        }
                    162:        return ret;
                    163: }
                    164:
                    165: /*
                    166:  * Calculate key for a sort list item
                    167:  */
                    168: static void
                    169: sort_list_item_make_key(struct sort_list_item *si)
                    170: {
                    171:        preproc(si->str, &(si->ka));
                    172: }
                    173:
                    174: /*
                    175:  * Set value of a sort list item.
                    176:  * Return combined string and keys memory size.
                    177:  */
                    178: void
                    179: sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
                    180: {
                    181:        if (si) {
                    182:                clean_keys_array(si->str, &(si->ka));
                    183:                if (si->str) {
                    184:                        if (si->str == str) {
                    185:                                /* we are trying to reset the same string */
                    186:                                return;
                    187:                        } else {
                    188:                                bwsfree(si->str);
                    189:                                si->str = NULL;
                    190:                        }
                    191:                }
                    192:                si->str = str;
                    193:                sort_list_item_make_key(si);
                    194:        }
                    195: }
                    196:
                    197: /*
                    198:  * De-allocate a sort list item object memory
                    199:  */
                    200: void
                    201: sort_list_item_clean(struct sort_list_item *si)
                    202: {
                    203:        if (si) {
                    204:                clean_keys_array(si->str, &(si->ka));
                    205:                if (si->str) {
                    206:                        bwsfree(si->str);
                    207:                        si->str = NULL;
                    208:                }
                    209:        }
                    210: }
                    211:
                    212: /*
                    213:  * Skip columns according to specs
                    214:  */
                    215: static size_t
                    216: skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
                    217:     bool skip_blanks, bool *empty_key)
                    218: {
                    219:        if (cols < 1)
                    220:                return BWSLEN(s) + 1;
                    221:
                    222:        if (skip_blanks)
                    223:                while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
                    224:                        ++start;
                    225:
                    226:        while (start < BWSLEN(s) && cols > 1) {
                    227:                --cols;
                    228:                ++start;
                    229:        }
                    230:
                    231:        if (start >= BWSLEN(s))
                    232:                *empty_key = true;
                    233:
                    234:        return start;
                    235: }
                    236:
                    237: /*
                    238:  * Skip fields according to specs
                    239:  */
                    240: static size_t
                    241: skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
                    242: {
                    243:        if (fields < 2) {
                    244:                if (BWSLEN(s) == 0)
                    245:                        *empty_field = true;
                    246:                return 0;
                    247:        } else if (!(sort_opts_vals.tflag)) {
                    248:                size_t cpos = 0;
                    249:                bool pb = true;
                    250:
                    251:                while (cpos < BWSLEN(s)) {
                    252:                        bool isblank;
                    253:
                    254:                        isblank = iswblank(BWS_GET(s, cpos));
                    255:
                    256:                        if (isblank && !pb) {
                    257:                                --fields;
                    258:                                if (fields <= 1)
                    259:                                        return cpos;
                    260:                        }
                    261:                        pb = isblank;
                    262:                        ++cpos;
                    263:                }
                    264:                if (fields > 1)
                    265:                        *empty_field = true;
                    266:                return cpos;
                    267:        } else {
                    268:                size_t cpos = 0;
                    269:
                    270:                while (cpos < BWSLEN(s)) {
                    271:                        if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
                    272:                                --fields;
                    273:                                if (fields <= 1)
                    274:                                        return cpos + 1;
                    275:                        }
                    276:                        ++cpos;
                    277:                }
                    278:                if (fields > 1)
                    279:                        *empty_field = true;
                    280:                return cpos;
                    281:        }
                    282: }
                    283:
                    284: /*
                    285:  * Find fields start
                    286:  */
                    287: static void
                    288: find_field_start(const struct bwstring *s, struct key_specs *ks,
                    289:     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
                    290: {
                    291:        *field_start = skip_fields_to_start(s, ks->f1, empty_field);
                    292:        if (!*empty_field)
                    293:                *key_start = skip_cols_to_start(s, ks->c1, *field_start,
                    294:                    ks->pos1b, empty_key);
                    295:        else
                    296:                *empty_key = true;
                    297: }
                    298:
                    299: /*
                    300:  * Find end key position
                    301:  */
                    302: static size_t
                    303: find_field_end(const struct bwstring *s, struct key_specs *ks)
                    304: {
                    305:        size_t f2, next_field_start, pos_end;
                    306:        bool empty_field, empty_key;
                    307:
                    308:        empty_field = false;
                    309:        empty_key = false;
                    310:        f2 = ks->f2;
                    311:
                    312:        if (f2 == 0)
                    313:                return BWSLEN(s) + 1;
                    314:        else {
                    315:                if (ks->c2 == 0) {
                    316:                        next_field_start = skip_fields_to_start(s, f2 + 1,
                    317:                            &empty_field);
                    318:                        if ((next_field_start > 0) && sort_opts_vals.tflag &&
                    319:                            ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
                    320:                            next_field_start - 1)))
                    321:                                --next_field_start;
                    322:                } else
                    323:                        next_field_start = skip_fields_to_start(s, f2,
                    324:                            &empty_field);
                    325:        }
                    326:
                    327:        if (empty_field || (next_field_start >= BWSLEN(s)))
                    328:                return BWSLEN(s) + 1;
                    329:
                    330:        if (ks->c2) {
                    331:                pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
                    332:                    ks->pos2b, &empty_key);
                    333:                if (pos_end < BWSLEN(s))
                    334:                        ++pos_end;
                    335:        } else
                    336:                pos_end = next_field_start;
                    337:
                    338:        return pos_end;
                    339: }
                    340:
                    341: /*
                    342:  * Cut a field according to the key specs
                    343:  */
                    344: static struct bwstring *
                    345: cut_field(const struct bwstring *s, struct key_specs *ks)
                    346: {
                    347:        struct bwstring *ret = NULL;
                    348:
                    349:        if (s && ks) {
                    350:                size_t field_start, key_end, key_start, sz;
                    351:                bool empty_field, empty_key;
                    352:
                    353:                field_start = 0;
                    354:                key_start = 0;
                    355:                empty_field = false;
                    356:                empty_key = false;
                    357:
                    358:                find_field_start(s, ks, &field_start, &key_start,
                    359:                    &empty_field, &empty_key);
                    360:
                    361:                if (empty_key)
                    362:                        sz = 0;
                    363:                else {
                    364:                        key_end = find_field_end(s, ks);
                    365:                        sz = (key_end < key_start) ? 0 : (key_end - key_start);
                    366:                }
                    367:
                    368:                ret = bwsalloc(sz);
                    369:                if (sz)
                    370:                        bwsnocpy(ret, s, key_start, sz);
                    371:        } else
                    372:                ret = bwsalloc(0);
                    373:
                    374:        return ret;
                    375: }
                    376:
                    377: /*
                    378:  * Preprocesses a line applying the necessary transformations
                    379:  * specified by command line options and returns the preprocessed
                    380:  * string, which can be used to compare.
                    381:  */
                    382: int
                    383: preproc(struct bwstring *s, struct keys_array *ka)
                    384: {
                    385:        if (sort_opts_vals.kflag) {
                    386:                size_t i;
                    387:                for (i = 0; i < keys_num; i++) {
                    388:                        struct bwstring *key;
                    389:                        struct key_specs *kspecs;
                    390:                        struct sort_mods *sm;
                    391:
                    392:                        kspecs = &(keys[i]);
                    393:                        key = cut_field(s, kspecs);
                    394:
                    395:                        sm = &(kspecs->sm);
                    396:                        if (sm->dflag)
                    397:                                key = dictionary_order(key);
                    398:                        else if (sm->iflag)
                    399:                                key = ignore_nonprinting(key);
                    400:                        if (sm->fflag || sm->Mflag)
                    401:                                key = ignore_case(key);
                    402:
                    403:                        set_key_on_keys_array(ka, key, i);
                    404:                }
                    405:        } else {
                    406:                struct bwstring *ret = NULL;
                    407:                struct sort_mods *sm = default_sort_mods;
                    408:
                    409:                if (sm->bflag) {
                    410:                        if (ret == NULL)
                    411:                                ret = bwsdup(s);
                    412:                        ret = ignore_leading_blanks(ret);
                    413:                }
                    414:                if (sm->dflag) {
                    415:                        if (ret == NULL)
                    416:                                ret = bwsdup(s);
                    417:                        ret = dictionary_order(ret);
                    418:                } else if (sm->iflag) {
                    419:                        if (ret == NULL)
                    420:                                ret = bwsdup(s);
                    421:                        ret = ignore_nonprinting(ret);
                    422:                }
                    423:                if (sm->fflag || sm->Mflag) {
                    424:                        if (ret == NULL)
                    425:                                ret = bwsdup(s);
                    426:                        ret = ignore_case(ret);
                    427:                }
                    428:                if (ret == NULL)
                    429:                        set_key_on_keys_array(ka, s, 0);
                    430:                else
                    431:                        set_key_on_keys_array(ka, ret, 0);
                    432:        }
                    433:
                    434:        return 0;
                    435: }
                    436:
                    437: cmpcoll_t
                    438: get_sort_func(struct sort_mods *sm)
                    439: {
                    440:        if (sm->nflag)
                    441:                return numcoll;
                    442:        else if (sm->hflag)
                    443:                return hnumcoll;
                    444:        else if (sm->gflag)
                    445:                return gnumcoll;
                    446:        else if (sm->Mflag)
                    447:                return monthcoll;
                    448:        else if (sm->Rflag)
                    449:                return randomcoll;
                    450:        else if (sm->Vflag)
                    451:                return versioncoll;
                    452:        else
                    453:                return wstrcoll;
                    454: }
                    455:
                    456: /*
                    457:  * Compares the given strings.  Returns a positive number if
                    458:  * the first precedes the second, a negative number if the second is
                    459:  * the preceding one, and zero if they are equal.  This function calls
                    460:  * the underlying collate functions, which done the actual comparison.
                    461:  */
                    462: int
                    463: key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
                    464: {
                    465:        struct sort_mods *sm;
                    466:        int res = 0;
                    467:        size_t i;
                    468:
                    469:        for (i = 0; i < keys_num; ++i) {
                    470:                sm = &(keys[i].sm);
                    471:
                    472:                if (sm->rflag)
                    473:                        res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
                    474:                else
                    475:                        res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
                    476:
                    477:                if (res)
                    478:                        break;
                    479:
                    480:                /* offset applies to only the first key */
                    481:                offset = 0;
                    482:        }
                    483:        return res;
                    484: }
                    485:
                    486: /*
                    487:  * Compare two strings.
                    488:  * Plain symbol-by-symbol comparison.
                    489:  */
                    490: int
                    491: top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
                    492: {
                    493:        if (default_sort_mods->rflag) {
                    494:                const struct bwstring *tmp;
                    495:
                    496:                tmp = s1;
                    497:                s1 = s2;
                    498:                s2 = tmp;
                    499:        }
                    500:
                    501:        return bwscoll(s1, s2, 0);
                    502: }
                    503:
                    504: /*
                    505:  * Compare a string and a sort list item, according to the sort specs.
                    506:  */
                    507: int
                    508: str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
                    509: {
                    510:        struct keys_array *ka1;
                    511:        int ret = 0;
                    512:
                    513:        ka1 = keys_array_alloc();
                    514:
                    515:        preproc(str1, ka1);
                    516:
                    517:        sort_list_item_make_key(*ss2);
                    518:
                    519:        if (debug_sort) {
                    520:                bwsprintf(stdout, str1, "; s1=<", ">");
                    521:                bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
                    522:        }
                    523:
                    524:        ret = key_coll(ka1, &((*ss2)->ka), 0);
                    525:
                    526:        if (debug_sort)
                    527:                printf("; cmp1=%d", ret);
                    528:
                    529:        clean_keys_array(str1, ka1);
                    530:        sort_free(ka1);
                    531:
                    532:        if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
                    533:                ret = top_level_str_coll(str1, ((*ss2)->str));
                    534:                if (debug_sort)
                    535:                        printf("; cmp2=%d", ret);
                    536:        }
                    537:
                    538:        if (debug_sort)
                    539:                putchar('\n');
                    540:
                    541:        return ret;
                    542: }
                    543:
                    544: /*
                    545:  * Compare two sort list items, according to the sort specs.
                    546:  */
                    547: int
                    548: list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
                    549:     size_t offset)
                    550: {
                    551:        int ret;
                    552:
                    553:        ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
                    554:
                    555:        if (debug_sort) {
                    556:                if (offset)
                    557:                        printf("; offset=%d", (int) offset);
                    558:                bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
                    559:                bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
                    560:                printf("; cmp1=%d\n", ret);
                    561:        }
                    562:
                    563:        if (ret)
                    564:                return ret;
                    565:
                    566:        if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
                    567:                ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
                    568:                if (debug_sort)
                    569:                        printf("; cmp2=%d\n", ret);
                    570:        }
                    571:
                    572:        return ret;
                    573: }
                    574:
                    575: /*
                    576:  * Compare two sort list items, according to the sort specs.
                    577:  */
                    578: int
                    579: list_coll(const void *ss1, const void *ss2)
                    580: {
                    581:        return list_coll_offset((struct sort_list_item **)ss1,
                    582:            (struct sort_list_item **)ss2, 0);
                    583: }
                    584:
                    585: #define LSCDEF(N)                                                                                      \
                    586: static int                                                                                             \
                    587: list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)                                        \
                    588: {                                                                                                      \
                    589:                                                                                                        \
                    590:        return list_coll_offset(ss1, ss2, N);                                                           \
                    591: }
                    592:
                    593: LSCDEF(0)
                    594: LSCDEF(1)
                    595: LSCDEF(2)
                    596: LSCDEF(3)
                    597: LSCDEF(4)
                    598: LSCDEF(5)
                    599: LSCDEF(6)
                    600: LSCDEF(7)
                    601: LSCDEF(8)
                    602: LSCDEF(9)
                    603: LSCDEF(10)
                    604: LSCDEF(11)
                    605: LSCDEF(12)
                    606: LSCDEF(13)
                    607: LSCDEF(14)
                    608: LSCDEF(15)
                    609: LSCDEF(16)
                    610: LSCDEF(17)
                    611: LSCDEF(18)
                    612: LSCDEF(19)
                    613: LSCDEF(20)
                    614:
                    615: listcoll_t
                    616: get_list_call_func(size_t offset)
                    617: {
                    618:        static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
                    619:            list_coll_2, list_coll_3, list_coll_4, list_coll_5,
                    620:            list_coll_6, list_coll_7, list_coll_8, list_coll_9,
                    621:            list_coll_10, list_coll_11, list_coll_12, list_coll_13,
                    622:            list_coll_14, list_coll_15, list_coll_16, list_coll_17,
                    623:            list_coll_18, list_coll_19, list_coll_20 };
                    624:
                    625:        if (offset <= 20)
                    626:                return lsarray[offset];
                    627:
                    628:        return list_coll_0;
                    629: }
                    630:
                    631: /*
                    632:  * Compare two sort list items, only by their original string.
                    633:  */
                    634: int
                    635: list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
                    636: {
                    637:        return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
                    638: }
                    639:
                    640: /*
                    641:  * Maximum size of a number in the string (before or after decimal point)
                    642:  */
                    643: #define MAX_NUM_SIZE (128)
                    644:
                    645: /*
                    646:  * Set suffix value
                    647:  */
                    648: static void
                    649: setsuffix(wchar_t c, unsigned char *si)
                    650: {
                    651:        switch (c){
                    652:        case L'k':
                    653:        case L'K':
                    654:                *si = 1;
                    655:                break;
                    656:        case L'M':
                    657:                *si = 2;
                    658:                break;
                    659:        case L'G':
                    660:                *si = 3;
                    661:                break;
                    662:        case L'T':
                    663:                *si = 4;
                    664:                break;
                    665:        case L'P':
                    666:                *si = 5;
                    667:                break;
                    668:        case L'E':
                    669:                *si = 6;
                    670:                break;
                    671:        case L'Z':
                    672:                *si = 7;
                    673:                break;
                    674:        case L'Y':
                    675:                *si = 8;
                    676:                break;
                    677:        default:
                    678:                *si = 0;
                    679:        };
                    680: }
                    681:
                    682: /*
                    683:  * Read string s and parse the string into a fixed-decimal-point number.
                    684:  * sign equals -1 if the number is negative (explicit plus is not allowed,
                    685:  * according to GNU sort's "info sort".
                    686:  * The number part before decimal point is in the smain, after the decimal
                    687:  * point is in sfrac, tail is the pointer to the remainder of the string.
                    688:  */
                    689: static int
                    690: read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
                    691: {
                    692:        bwstring_iterator s;
                    693:
                    694:        s = bws_begin(s0);
                    695:
                    696:        /* always end the fraction with zero, even if we have no fraction */
                    697:        sfrac[0] = 0;
                    698:
                    699:        while (iswblank(bws_get_iter_value(s)))
                    700:                s = bws_iterator_inc(s, 1);
                    701:
                    702:        if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
                    703:                *sign = -1;
                    704:                s = bws_iterator_inc(s, 1);
                    705:        }
                    706:
                    707:        // This is '0', not '\0', do not change this
                    708:        while (iswdigit(bws_get_iter_value(s)) &&
                    709:            (bws_get_iter_value(s) == L'0'))
                    710:                s = bws_iterator_inc(s, 1);
                    711:
                    712:        while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
                    713:                if (iswdigit(bws_get_iter_value(s))) {
                    714:                        smain[*main_len] = bws_get_iter_value(s);
                    715:                        s = bws_iterator_inc(s, 1);
                    716:                        *main_len += 1;
                    717:                } else if (symbol_thousands_sep &&
                    718:                    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
                    719:                        s = bws_iterator_inc(s, 1);
                    720:                else
                    721:                        break;
                    722:        }
                    723:
                    724:        smain[*main_len] = 0;
                    725:
                    726:        if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
                    727:                s = bws_iterator_inc(s, 1);
                    728:                while (iswdigit(bws_get_iter_value(s)) &&
                    729:                    *frac_len < MAX_NUM_SIZE) {
                    730:                        sfrac[*frac_len] = bws_get_iter_value(s);
                    731:                        s = bws_iterator_inc(s, 1);
                    732:                        *frac_len += 1;
                    733:                }
                    734:                sfrac[*frac_len] = 0;
                    735:
                    736:                while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
                    737:                        --(*frac_len);
                    738:                        sfrac[*frac_len] = L'\0';
                    739:                }
                    740:        }
                    741:
                    742:        setsuffix(bws_get_iter_value(s), si);
                    743:
                    744:        if ((*main_len + *frac_len) == 0)
                    745:                *sign = 0;
                    746:
                    747:        return 0;
                    748: }
                    749:
                    750: /*
                    751:  * Implements string sort.
                    752:  */
                    753: static int
                    754: wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
                    755: {
                    756:
                    757:        if (debug_sort) {
                    758:                if (offset)
                    759:                        printf("; offset=%d\n", (int) offset);
                    760:                bwsprintf(stdout, kv1->k, "; k1=<", ">");
                    761:                printf("(%zu)", BWSLEN(kv1->k));
                    762:                bwsprintf(stdout, kv2->k, ", k2=<", ">");
                    763:                printf("(%zu)", BWSLEN(kv2->k));
                    764:        }
                    765:
                    766:        return bwscoll(kv1->k, kv2->k, offset);
                    767: }
                    768:
                    769: /*
                    770:  * Compare two suffixes
                    771:  */
                    772: static inline int
                    773: cmpsuffix(unsigned char si1, unsigned char si2)
                    774: {
                    775:        return (char)si1 - (char)si2;
                    776: }
                    777:
                    778: /*
                    779:  * Implements numeric sort for -n and -h.
                    780:  */
                    781: static int
                    782: numcoll_impl(struct key_value *kv1, struct key_value *kv2,
                    783:     size_t offset __unused, bool use_suffix)
                    784: {
                    785:        struct bwstring *s1, *s2;
                    786:        wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
                    787:        wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
                    788:        int cmp_res, sign1, sign2;
                    789:        size_t frac1, frac2, main1, main2;
                    790:        unsigned char SI1, SI2;
                    791:        bool e1, e2, key1_read, key2_read;
                    792:
                    793:        s1 = kv1->k;
                    794:        s2 = kv2->k;
                    795:        sign1 = sign2 = 0;
                    796:        main1 = main2 = 0;
                    797:        frac1 = frac2 = 0;
                    798:
                    799:        key1_read = key2_read = false;
                    800:
                    801:        if (debug_sort) {
                    802:                bwsprintf(stdout, s1, "; k1=<", ">");
                    803:                bwsprintf(stdout, s2, ", k2=<", ">");
                    804:        }
                    805:
                    806:        if (s1 == s2)
                    807:                return 0;
                    808:
                    809:        if (kv1->hint->status == HS_UNINITIALIZED) {
                    810:                /* read the number from the string */
                    811:                read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
                    812:                key1_read = true;
                    813:                kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
                    814:                if (main1 < 1 && frac1 < 1)
                    815:                        kv1->hint->v.nh.empty=true;
                    816:                kv1->hint->v.nh.si = SI1;
                    817:                kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
                    818:                    HS_INITIALIZED : HS_ERROR;
                    819:                kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
                    820:        }
                    821:
                    822:        if (kv2->hint->status == HS_UNINITIALIZED) {
                    823:                /* read the number from the string */
                    824:                read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
                    825:                key2_read = true;
                    826:                kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
                    827:                if (main2 < 1 && frac2 < 1)
                    828:                        kv2->hint->v.nh.empty=true;
                    829:                kv2->hint->v.nh.si = SI2;
                    830:                kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
                    831:                    HS_INITIALIZED : HS_ERROR;
                    832:                kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
                    833:        }
                    834:
                    835:        if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
                    836:            HS_INITIALIZED) {
                    837:                unsigned long long n1, n2;
                    838:                bool neg1, neg2;
                    839:
                    840:                e1 = kv1->hint->v.nh.empty;
                    841:                e2 = kv2->hint->v.nh.empty;
                    842:
                    843:                if (e1 && e2)
                    844:                        return 0;
                    845:
                    846:                neg1 = kv1->hint->v.nh.neg;
                    847:                neg2 = kv2->hint->v.nh.neg;
                    848:
                    849:                if (neg1 && !neg2)
                    850:                        return -1;
                    851:                if (neg2 && !neg1)
                    852:                        return 1;
                    853:
                    854:                if (e1)
                    855:                        return neg2 ? 1 : -1;
                    856:                else if (e2)
                    857:                        return neg1 ? -1 : 1;
                    858:
                    859:
                    860:                if (use_suffix) {
                    861:                        cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
                    862:                        if (cmp_res)
                    863:                                return neg1 ? -cmp_res : cmp_res;
                    864:                }
                    865:
                    866:                n1 = kv1->hint->v.nh.n1;
                    867:                n2 = kv2->hint->v.nh.n1;
                    868:                if (n1 < n2)
                    869:                        return neg1 ? 1 : -1;
                    870:                else if (n1 > n2)
                    871:                        return neg1 ? -1 : 1;
                    872:        }
                    873:
                    874:        /* read the numbers from the strings */
                    875:        if (!key1_read)
                    876:                read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
                    877:        if (!key2_read)
                    878:                read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
                    879:
                    880:        e1 = ((main1 + frac1) == 0);
                    881:        e2 = ((main2 + frac2) == 0);
                    882:
                    883:        if (e1 && e2)
                    884:                return 0;
                    885:
                    886:        /* we know the result if the signs are different */
                    887:        if (sign1 < 0 && sign2 >= 0)
                    888:                return -1;
                    889:        if (sign1 >= 0 && sign2 < 0)
                    890:                return 1;
                    891:
                    892:        if (e1)
                    893:                return (sign2 < 0) ? +1 : -1;
                    894:        else if (e2)
                    895:                return (sign1 < 0) ? -1 : 1;
                    896:
                    897:        if (use_suffix) {
                    898:                cmp_res = cmpsuffix(SI1, SI2);
                    899:                if (cmp_res)
                    900:                        return (sign1 < 0) ? -cmp_res : cmp_res;
                    901:        }
                    902:
                    903:        /* if both numbers are empty assume that the strings are equal */
                    904:        if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
                    905:                return 0;
                    906:
                    907:        /*
                    908:         * if the main part is of different size, we know the result
                    909:         * (because the leading zeros are removed)
                    910:         */
                    911:        if (main1 < main2)
                    912:                cmp_res = -1;
                    913:        else if (main1 > main2)
                    914:                cmp_res = +1;
                    915:        /* if the sizes are equal then simple non-collate string compare gives the correct result */
                    916:        else
                    917:                cmp_res = wcscmp(smain1, smain2);
                    918:
                    919:        /* check fraction */
                    920:        if (!cmp_res)
                    921:                cmp_res = wcscmp(sfrac1, sfrac2);
                    922:
                    923:        if (!cmp_res)
                    924:                return 0;
                    925:
                    926:        /* reverse result if the signs are negative */
                    927:        if (sign1 < 0 && sign2 < 0)
                    928:                cmp_res = -cmp_res;
                    929:
                    930:        return cmp_res;
                    931: }
                    932:
                    933: /*
                    934:  * Implements numeric sort (-n).
                    935:  */
                    936: static int
                    937: numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
                    938: {
                    939:        return numcoll_impl(kv1, kv2, offset, false);
                    940: }
                    941:
                    942: /*
                    943:  * Implements 'human' numeric sort (-h).
                    944:  */
                    945: static int
                    946: hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
                    947: {
                    948:        return numcoll_impl(kv1, kv2, offset, true);
                    949: }
                    950:
                    951: /*
                    952:  * Implements random sort (-R).
                    953:  */
                    954: static int
                    955: randomcoll(struct key_value *kv1, struct key_value *kv2,
                    956:     size_t offset __unused)
                    957: {
                    958:        struct bwstring *s1, *s2;
                    959:        MD5_CTX ctx1, ctx2;
                    960:        char *b1, *b2;
                    961:
                    962:        s1 = kv1->k;
                    963:        s2 = kv2->k;
                    964:
                    965:        if (debug_sort) {
                    966:                bwsprintf(stdout, s1, "; k1=<", ">");
                    967:                bwsprintf(stdout, s2, ", k2=<", ">");
                    968:        }
                    969:
                    970:        if (s1 == s2)
                    971:                return 0;
                    972:
                    973:        memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
                    974:        memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
                    975:
                    976:        MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
                    977:        MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
                    978:        b1 = MD5End(&ctx1, NULL);
                    979:        b2 = MD5End(&ctx2, NULL);
                    980:        if (b1 == NULL) {
                    981:                if (b2 == NULL)
                    982:                        return 0;
                    983:                else {
                    984:                        sort_free(b2);
                    985:                        return -1;
                    986:                }
                    987:        } else if (b2 == NULL) {
                    988:                sort_free(b1);
                    989:                return 1;
                    990:        } else {
                    991:                int cmp_res;
                    992:
                    993:                cmp_res = strcmp(b1, b2);
                    994:                sort_free(b1);
                    995:                sort_free(b2);
                    996:
                    997:                if (!cmp_res)
                    998:                        cmp_res = bwscoll(s1, s2, 0);
                    999:
                   1000:                return cmp_res;
                   1001:        }
                   1002: }
                   1003:
                   1004: /*
                   1005:  * Implements version sort (-V).
                   1006:  */
                   1007: static int
                   1008: versioncoll(struct key_value *kv1, struct key_value *kv2,
                   1009:     size_t offset __unused)
                   1010: {
                   1011:        struct bwstring *s1, *s2;
                   1012:
                   1013:        s1 = kv1->k;
                   1014:        s2 = kv2->k;
                   1015:
                   1016:        if (debug_sort) {
                   1017:                bwsprintf(stdout, s1, "; k1=<", ">");
                   1018:                bwsprintf(stdout, s2, ", k2=<", ">");
                   1019:        }
                   1020:
                   1021:        if (s1 == s2)
                   1022:                return 0;
                   1023:
                   1024:        return vcmp(s1, s2);
                   1025: }
                   1026:
                   1027: /*
                   1028:  * Check for minus infinity
                   1029:  */
                   1030: static inline bool
                   1031: huge_minus(double d, int err1)
                   1032: {
                   1033:        if (err1 == ERANGE)
                   1034:                if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
                   1035:                        return 1;
                   1036:
                   1037:        return 0;
                   1038: }
                   1039:
                   1040: /*
                   1041:  * Check for plus infinity
                   1042:  */
                   1043: static inline bool
                   1044: huge_plus(double d, int err1)
                   1045: {
                   1046:        if (err1 == ERANGE)
                   1047:                if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
                   1048:                        return 1;
                   1049:
                   1050:        return 0;
                   1051: }
                   1052:
                   1053: /*
                   1054:  * Check whether a function is a NAN
                   1055:  */
                   1056: static bool
                   1057: is_nan(double d)
                   1058: {
1.3       miod     1059: #if defined(NAN)
1.1       millert  1060:        return (d == NAN || isnan(d));
1.3       miod     1061: #else
                   1062:        return (isnan(d));
                   1063: #endif
1.1       millert  1064: }
                   1065:
                   1066: /*
                   1067:  * Compare two NANs
                   1068:  */
                   1069: static int
                   1070: cmp_nans(double d1, double d2)
                   1071: {
                   1072:        if (d1 == d2)
                   1073:                return 0;
                   1074:        return d1 < d2 ? -1 : 1;
                   1075: }
                   1076:
                   1077: /*
                   1078:  * Implements general numeric sort (-g).
                   1079:  */
                   1080: static int
                   1081: gnumcoll(struct key_value *kv1, struct key_value *kv2,
                   1082:     size_t offset __unused)
                   1083: {
                   1084:        double d1, d2;
                   1085:        int err1, err2;
                   1086:        bool empty1, empty2, key1_read, key2_read;
                   1087:
                   1088:        d1 = d2 = 0;
                   1089:        err1 = err2 = 0;
                   1090:        key1_read = key2_read = false;
                   1091:
                   1092:        if (debug_sort) {
                   1093:                bwsprintf(stdout, kv1->k, "; k1=<", ">");
                   1094:                bwsprintf(stdout, kv2->k, "; k2=<", ">");
                   1095:        }
                   1096:
                   1097:        if (kv1->hint->status == HS_UNINITIALIZED) {
                   1098:                errno = 0;
                   1099:                d1 = bwstod(kv1->k, &empty1);
                   1100:                err1 = errno;
                   1101:
                   1102:                if (empty1)
                   1103:                        kv1->hint->v.gh.notnum = true;
                   1104:                else if (err1 == 0) {
                   1105:                        kv1->hint->v.gh.d = d1;
                   1106:                        kv1->hint->v.gh.nan = is_nan(d1);
                   1107:                        kv1->hint->status = HS_INITIALIZED;
                   1108:                } else
                   1109:                        kv1->hint->status = HS_ERROR;
                   1110:
                   1111:                key1_read = true;
                   1112:        }
                   1113:
                   1114:        if (kv2->hint->status == HS_UNINITIALIZED) {
                   1115:                errno = 0;
                   1116:                d2 = bwstod(kv2->k, &empty2);
                   1117:                err2 = errno;
                   1118:
                   1119:                if (empty2)
                   1120:                        kv2->hint->v.gh.notnum = true;
                   1121:                else if (err2 == 0) {
                   1122:                        kv2->hint->v.gh.d = d2;
                   1123:                        kv2->hint->v.gh.nan = is_nan(d2);
                   1124:                        kv2->hint->status = HS_INITIALIZED;
                   1125:                } else
                   1126:                        kv2->hint->status = HS_ERROR;
                   1127:
                   1128:                key2_read = true;
                   1129:        }
                   1130:
                   1131:        if (kv1->hint->status == HS_INITIALIZED &&
                   1132:            kv2->hint->status == HS_INITIALIZED) {
                   1133:                if (kv1->hint->v.gh.notnum)
                   1134:                        return kv2->hint->v.gh.notnum ? 0 : -1;
                   1135:                else if (kv2->hint->v.gh.notnum)
                   1136:                        return 1;
                   1137:
                   1138:                if (kv1->hint->v.gh.nan)
                   1139:                        return kv2->hint->v.gh.nan ?
                   1140:                            cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
                   1141:                else if (kv2->hint->v.gh.nan)
                   1142:                        return 1;
                   1143:
                   1144:                d1 = kv1->hint->v.gh.d;
                   1145:                d2 = kv2->hint->v.gh.d;
                   1146:
                   1147:                if (d1 < d2)
                   1148:                        return -1;
                   1149:                else if (d1 > d2)
                   1150:                        return 1;
                   1151:                else
                   1152:                        return 0;
                   1153:        }
                   1154:
                   1155:        if (!key1_read) {
                   1156:                errno = 0;
                   1157:                d1 = bwstod(kv1->k, &empty1);
                   1158:                err1 = errno;
                   1159:        }
                   1160:
                   1161:        if (!key2_read) {
                   1162:                errno = 0;
                   1163:                d2 = bwstod(kv2->k, &empty2);
                   1164:                err2 = errno;
                   1165:        }
                   1166:
                   1167:        /* Non-value case: */
                   1168:        if (empty1)
                   1169:                return empty2 ? 0 : -1;
                   1170:        else if (empty2)
                   1171:                return 1;
                   1172:
                   1173:        /* NAN case */
                   1174:        if (is_nan(d1))
                   1175:                return is_nan(d2) ? cmp_nans(d1, d2) : -1;
                   1176:        else if (is_nan(d2))
                   1177:                return 1;
                   1178:
                   1179:        /* Infinities */
                   1180:        if (err1 == ERANGE || err2 == ERANGE) {
                   1181:                /* Minus infinity case */
                   1182:                if (huge_minus(d1, err1)) {
                   1183:                        if (huge_minus(d2, err2)) {
                   1184:                                if (d1 == d2)
                   1185:                                        return 0;
                   1186:                                return d1 < d2 ? -1 : 1;
                   1187:                        } else
                   1188:                                return -1;
                   1189:
                   1190:                } else if (huge_minus(d2, err2)) {
                   1191:                        if (huge_minus(d1, err1)) {
                   1192:                                if (d1 == d2)
                   1193:                                        return 0;
                   1194:                                return d1 < d2 ? -1 : 1;
                   1195:                        } else
                   1196:                                return 1;
                   1197:                }
                   1198:
                   1199:                /* Plus infinity case */
                   1200:                if (huge_plus(d1, err1)) {
                   1201:                        if (huge_plus(d2, err2)) {
                   1202:                                if (d1 == d2)
                   1203:                                        return 0;
                   1204:                                return d1 < d2 ? -1 : 1;
                   1205:                        } else
                   1206:                                return 1;
                   1207:                } else if (huge_plus(d2, err2)) {
                   1208:                        if (huge_plus(d1, err1)) {
                   1209:                                if (d1 == d2)
                   1210:                                        return 0;
                   1211:                                return d1 < d2 ? -1 : 1;
                   1212:                        } else
                   1213:                                return -1;
                   1214:                }
                   1215:        }
                   1216:
                   1217:        if (d1 == d2)
                   1218:                return 0;
                   1219:        return d1 < d2 ? -1 : 1;
                   1220: }
                   1221:
                   1222: /*
                   1223:  * Implements month sort (-M).
                   1224:  */
                   1225: static int
                   1226: monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
                   1227: {
                   1228:        int val1, val2;
                   1229:        bool key1_read, key2_read;
                   1230:
                   1231:        val1 = val2 = 0;
                   1232:        key1_read = key2_read = false;
                   1233:
                   1234:        if (debug_sort) {
                   1235:                bwsprintf(stdout, kv1->k, "; k1=<", ">");
                   1236:                bwsprintf(stdout, kv2->k, "; k2=<", ">");
                   1237:        }
                   1238:
                   1239:        if (kv1->hint->status == HS_UNINITIALIZED) {
                   1240:                kv1->hint->v.Mh.m = bws_month_score(kv1->k);
                   1241:                key1_read = true;
                   1242:                kv1->hint->status = HS_INITIALIZED;
                   1243:        }
                   1244:
                   1245:        if (kv2->hint->status == HS_UNINITIALIZED) {
                   1246:                kv2->hint->v.Mh.m = bws_month_score(kv2->k);
                   1247:                key2_read = true;
                   1248:                kv2->hint->status = HS_INITIALIZED;
                   1249:        }
                   1250:
                   1251:        if (kv1->hint->status == HS_INITIALIZED) {
                   1252:                val1 = kv1->hint->v.Mh.m;
                   1253:                key1_read = true;
                   1254:        }
                   1255:
                   1256:        if (kv2->hint->status == HS_INITIALIZED) {
                   1257:                val2 = kv2->hint->v.Mh.m;
                   1258:                key2_read = true;
                   1259:        }
                   1260:
                   1261:        if (!key1_read)
                   1262:                val1 = bws_month_score(kv1->k);
                   1263:        if (!key2_read)
                   1264:                val2 = bws_month_score(kv2->k);
                   1265:
                   1266:        if (val1 == val2)
                   1267:                return 0;
                   1268:        return val1 < val2 ? -1 : 1;
                   1269: }