src/usr.bin/sort/coll.c - annotate

Return to coll.c CVS log
Up to [local] / src / usr.bin / sort
Annotation of src/usr.bin/sort/coll.c, Revision 1.9

1.9     ! millert     1: /*     $OpenBSD: coll.c,v 1.8 2015/04/02 22:14:51 deraadt Exp $        */
1.1       millert     2:
                      3: /*-
                      4:  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
                      5:  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  */
                     29:
                     30: #include <sys/types.h>
                     31:
                     32: #include <errno.h>
                     33: #include <err.h>
                     34: #include <langinfo.h>
                     35: #include <limits.h>
                     36: #include <math.h>
                     37: #include <md5.h>
                     38: #include <stdlib.h>
                     39: #include <string.h>
                     40: #include <wchar.h>
                     41: #include <wctype.h>
                     42:
                     43: #include "coll.h"
                     44: #include "vsort.h"
                     45:
                     46: struct key_specs *keys;
                     47: size_t keys_num = 0;
                     48:
                     49: wint_t symbol_decimal_point = L'.';
                     50: /* there is no default thousands separator in collate rules: */
                     51: wint_t symbol_thousands_sep = 0;
                     52: wint_t symbol_negative_sign = L'-';
                     53: wint_t symbol_positive_sign = L'+';
                     54:
                     55: static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
                     56: static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
                     57: static int monthcoll(struct key_value*, struct key_value *, size_t offset);
                     58: static int numcoll(struct key_value*, struct key_value *, size_t offset);
                     59: static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
                     60: static int randomcoll(struct key_value*, struct key_value *, size_t offset);
                     61: static int versioncoll(struct key_value*, struct key_value *, size_t offset);
                     62:
                     63: /*
                     64:  * Allocate keys array
                     65:  */
                     66: struct keys_array *
                     67: keys_array_alloc(void)
                     68: {
                     69:        struct keys_array *ka;
                     70:        size_t sz;
                     71:
                     72:        sz = keys_array_size();
1.2       millert    73:        ka = sort_calloc(1, sz);
1.1       millert    74:
                     75:        return ka;
                     76: }
                     77:
                     78: /*
1.8       deraadt    79:  * Calculate whether we need key hint space
1.1       millert    80:  */
                     81: static size_t
                     82: key_hint_size(void)
                     83: {
                     84:        return need_hint ? sizeof(struct key_hint) : 0;
                     85: }
                     86:
                     87: /*
                     88:  * Calculate keys array size
                     89:  */
                     90: size_t
                     91: keys_array_size(void)
                     92: {
                     93:        return keys_num * (sizeof(struct key_value) + key_hint_size());
                     94: }
                     95:
                     96: /*
                     97:  * Clean data of keys array
                     98:  */
                     99: void
                    100: clean_keys_array(const struct bwstring *s, struct keys_array *ka)
                    101: {
                    102:        if (ka) {
                    103:                size_t i;
                    104:
                    105:                for (i = 0; i < keys_num; ++i)
                    106:                        if (ka->key[i].k && ka->key[i].k != s)
                    107:                                bwsfree(ka->key[i].k);
                    108:                memset(ka, 0, keys_array_size());
                    109:        }
                    110: }
                    111:
                    112: /*
                    113:  * Set value of a key in the keys set
                    114:  */
                    115: void
                    116: set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
                    117: {
                    118:        if (ka && keys_num > ind) {
                    119:                struct key_value *kv;
                    120:
                    121:                kv = &(ka->key[ind]);
                    122:
1.7       tobias    123:                if (kv->k != s)
1.1       millert   124:                        bwsfree(kv->k);
                    125:                kv->k = s;
                    126:        }
                    127: }
                    128:
                    129: /*
                    130:  * Initialize a sort list item
                    131:  */
                    132: struct sort_list_item *
                    133: sort_list_item_alloc(void)
                    134: {
                    135:        struct sort_list_item *si;
                    136:        size_t sz;
                    137:
                    138:        sz = sizeof(struct sort_list_item) + keys_array_size();
1.2       millert   139:        si = sort_calloc(1, sz);
1.1       millert   140:
                    141:        return si;
                    142: }
                    143:
                    144: size_t
                    145: sort_list_item_size(struct sort_list_item *si)
                    146: {
                    147:        size_t i, ret = 0;
                    148:
                    149:        if (si) {
                    150:                ret = sizeof(struct sort_list_item) + keys_array_size();
                    151:                if (si->str)
                    152:                        ret += bws_memsize(si->str);
                    153:                for (i = 0; i < keys_num; ++i) {
                    154:                        struct key_value *kv;
                    155:
                    156:                        kv = &(si->ka.key[i]);
                    157:
                    158:                        if (kv->k != si->str)
                    159:                                ret += bws_memsize(kv->k);
                    160:                }
                    161:        }
                    162:        return ret;
                    163: }
                    164:
                    165: /*
                    166:  * Calculate key for a sort list item
                    167:  */
                    168: static void
                    169: sort_list_item_make_key(struct sort_list_item *si)
                    170: {
                    171:        preproc(si->str, &(si->ka));
                    172: }
                    173:
                    174: /*
                    175:  * Set value of a sort list item.
                    176:  * Return combined string and keys memory size.
                    177:  */
                    178: void
                    179: sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
                    180: {
                    181:        if (si) {
                    182:                clean_keys_array(si->str, &(si->ka));
                    183:                if (si->str) {
                    184:                        if (si->str == str) {
                    185:                                /* we are trying to reset the same string */
                    186:                                return;
                    187:                        } else {
                    188:                                bwsfree(si->str);
                    189:                                si->str = NULL;
                    190:                        }
                    191:                }
                    192:                si->str = str;
                    193:                sort_list_item_make_key(si);
                    194:        }
                    195: }
                    196:
                    197: /*
                    198:  * De-allocate a sort list item object memory
                    199:  */
                    200: void
                    201: sort_list_item_clean(struct sort_list_item *si)
                    202: {
                    203:        if (si) {
                    204:                clean_keys_array(si->str, &(si->ka));
                    205:                if (si->str) {
                    206:                        bwsfree(si->str);
                    207:                        si->str = NULL;
                    208:                }
                    209:        }
                    210: }
                    211:
                    212: /*
                    213:  * Skip columns according to specs
                    214:  */
                    215: static size_t
                    216: skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
                    217:     bool skip_blanks, bool *empty_key)
                    218: {
                    219:        if (cols < 1)
                    220:                return BWSLEN(s) + 1;
                    221:
                    222:        if (skip_blanks)
                    223:                while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
                    224:                        ++start;
                    225:
                    226:        while (start < BWSLEN(s) && cols > 1) {
                    227:                --cols;
                    228:                ++start;
                    229:        }
                    230:
                    231:        if (start >= BWSLEN(s))
                    232:                *empty_key = true;
                    233:
                    234:        return start;
                    235: }
                    236:
                    237: /*
                    238:  * Skip fields according to specs
                    239:  */
                    240: static size_t
                    241: skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
                    242: {
                    243:        if (fields < 2) {
                    244:                if (BWSLEN(s) == 0)
                    245:                        *empty_field = true;
                    246:                return 0;
                    247:        } else if (!(sort_opts_vals.tflag)) {
                    248:                size_t cpos = 0;
                    249:                bool pb = true;
                    250:
                    251:                while (cpos < BWSLEN(s)) {
                    252:                        bool isblank;
                    253:
                    254:                        isblank = iswblank(BWS_GET(s, cpos));
                    255:
                    256:                        if (isblank && !pb) {
                    257:                                --fields;
                    258:                                if (fields <= 1)
                    259:                                        return cpos;
                    260:                        }
                    261:                        pb = isblank;
                    262:                        ++cpos;
                    263:                }
                    264:                if (fields > 1)
                    265:                        *empty_field = true;
                    266:                return cpos;
                    267:        } else {
                    268:                size_t cpos = 0;
                    269:
                    270:                while (cpos < BWSLEN(s)) {
                    271:                        if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
                    272:                                --fields;
                    273:                                if (fields <= 1)
                    274:                                        return cpos + 1;
                    275:                        }
                    276:                        ++cpos;
                    277:                }
                    278:                if (fields > 1)
                    279:                        *empty_field = true;
                    280:                return cpos;
                    281:        }
                    282: }
                    283:
                    284: /*
                    285:  * Find fields start
                    286:  */
                    287: static void
                    288: find_field_start(const struct bwstring *s, struct key_specs *ks,
                    289:     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
                    290: {
                    291:        *field_start = skip_fields_to_start(s, ks->f1, empty_field);
                    292:        if (!*empty_field)
                    293:                *key_start = skip_cols_to_start(s, ks->c1, *field_start,
                    294:                    ks->pos1b, empty_key);
                    295:        else
                    296:                *empty_key = true;
                    297: }
                    298:
                    299: /*
                    300:  * Find end key position
                    301:  */
                    302: static size_t
                    303: find_field_end(const struct bwstring *s, struct key_specs *ks)
                    304: {
                    305:        size_t f2, next_field_start, pos_end;
                    306:        bool empty_field, empty_key;
                    307:
                    308:        empty_field = false;
                    309:        empty_key = false;
                    310:        f2 = ks->f2;
                    311:
                    312:        if (f2 == 0)
                    313:                return BWSLEN(s) + 1;
                    314:        else {
                    315:                if (ks->c2 == 0) {
                    316:                        next_field_start = skip_fields_to_start(s, f2 + 1,
                    317:                            &empty_field);
                    318:                        if ((next_field_start > 0) && sort_opts_vals.tflag &&
                    319:                            ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
                    320:                            next_field_start - 1)))
                    321:                                --next_field_start;
                    322:                } else
                    323:                        next_field_start = skip_fields_to_start(s, f2,
                    324:                            &empty_field);
                    325:        }
                    326:
                    327:        if (empty_field || (next_field_start >= BWSLEN(s)))
                    328:                return BWSLEN(s) + 1;
                    329:
                    330:        if (ks->c2) {
                    331:                pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
                    332:                    ks->pos2b, &empty_key);
                    333:                if (pos_end < BWSLEN(s))
                    334:                        ++pos_end;
                    335:        } else
                    336:                pos_end = next_field_start;
                    337:
                    338:        return pos_end;
                    339: }
                    340:
                    341: /*
                    342:  * Cut a field according to the key specs
                    343:  */
                    344: static struct bwstring *
                    345: cut_field(const struct bwstring *s, struct key_specs *ks)
                    346: {
                    347:        struct bwstring *ret = NULL;
                    348:
                    349:        if (s && ks) {
                    350:                size_t field_start, key_end, key_start, sz;
                    351:                bool empty_field, empty_key;
                    352:
                    353:                field_start = 0;
                    354:                key_start = 0;
                    355:                empty_field = false;
                    356:                empty_key = false;
                    357:
                    358:                find_field_start(s, ks, &field_start, &key_start,
                    359:                    &empty_field, &empty_key);
                    360:
                    361:                if (empty_key)
                    362:                        sz = 0;
                    363:                else {
                    364:                        key_end = find_field_end(s, ks);
                    365:                        sz = (key_end < key_start) ? 0 : (key_end - key_start);
                    366:                }
                    367:
                    368:                ret = bwsalloc(sz);
                    369:                if (sz)
                    370:                        bwsnocpy(ret, s, key_start, sz);
                    371:        } else
                    372:                ret = bwsalloc(0);
                    373:
                    374:        return ret;
                    375: }
                    376:
                    377: /*
                    378:  * Preprocesses a line applying the necessary transformations
                    379:  * specified by command line options and returns the preprocessed
                    380:  * string, which can be used to compare.
                    381:  */
                    382: int
                    383: preproc(struct bwstring *s, struct keys_array *ka)
                    384: {
                    385:        if (sort_opts_vals.kflag) {
                    386:                size_t i;
                    387:                for (i = 0; i < keys_num; i++) {
                    388:                        struct bwstring *key;
                    389:                        struct key_specs *kspecs;
                    390:                        struct sort_mods *sm;
                    391:
                    392:                        kspecs = &(keys[i]);
                    393:                        key = cut_field(s, kspecs);
                    394:
                    395:                        sm = &(kspecs->sm);
                    396:                        if (sm->dflag)
                    397:                                key = dictionary_order(key);
                    398:                        else if (sm->iflag)
                    399:                                key = ignore_nonprinting(key);
                    400:                        if (sm->fflag || sm->Mflag)
                    401:                                key = ignore_case(key);
                    402:
                    403:                        set_key_on_keys_array(ka, key, i);
                    404:                }
                    405:        } else {
                    406:                struct bwstring *ret = NULL;
                    407:                struct sort_mods *sm = default_sort_mods;
                    408:
1.9     ! millert   409: #ifdef GNUSORT_COMPATIBILITY
1.1       millert   410:                if (sm->bflag) {
                    411:                        if (ret == NULL)
                    412:                                ret = bwsdup(s);
                    413:                        ret = ignore_leading_blanks(ret);
                    414:                }
1.9     ! millert   415: #endif
1.1       millert   416:                if (sm->dflag) {
                    417:                        if (ret == NULL)
                    418:                                ret = bwsdup(s);
                    419:                        ret = dictionary_order(ret);
                    420:                } else if (sm->iflag) {
                    421:                        if (ret == NULL)
                    422:                                ret = bwsdup(s);
                    423:                        ret = ignore_nonprinting(ret);
                    424:                }
                    425:                if (sm->fflag || sm->Mflag) {
                    426:                        if (ret == NULL)
                    427:                                ret = bwsdup(s);
                    428:                        ret = ignore_case(ret);
                    429:                }
                    430:                if (ret == NULL)
                    431:                        set_key_on_keys_array(ka, s, 0);
                    432:                else
                    433:                        set_key_on_keys_array(ka, ret, 0);
                    434:        }
                    435:
                    436:        return 0;
                    437: }
                    438:
                    439: cmpcoll_t
                    440: get_sort_func(struct sort_mods *sm)
                    441: {
                    442:        if (sm->nflag)
                    443:                return numcoll;
                    444:        else if (sm->hflag)
                    445:                return hnumcoll;
                    446:        else if (sm->gflag)
                    447:                return gnumcoll;
                    448:        else if (sm->Mflag)
                    449:                return monthcoll;
                    450:        else if (sm->Rflag)
                    451:                return randomcoll;
                    452:        else if (sm->Vflag)
                    453:                return versioncoll;
                    454:        else
                    455:                return wstrcoll;
                    456: }
                    457:
                    458: /*
                    459:  * Compares the given strings.  Returns a positive number if
                    460:  * the first precedes the second, a negative number if the second is
                    461:  * the preceding one, and zero if they are equal.  This function calls
                    462:  * the underlying collate functions, which done the actual comparison.
                    463:  */
                    464: int
                    465: key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
                    466: {
                    467:        struct sort_mods *sm;
                    468:        int res = 0;
                    469:        size_t i;
                    470:
                    471:        for (i = 0; i < keys_num; ++i) {
                    472:                sm = &(keys[i].sm);
                    473:
                    474:                if (sm->rflag)
                    475:                        res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
                    476:                else
                    477:                        res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
                    478:
                    479:                if (res)
                    480:                        break;
                    481:
                    482:                /* offset applies to only the first key */
                    483:                offset = 0;
                    484:        }
                    485:        return res;
                    486: }
                    487:
                    488: /*
                    489:  * Compare two strings.
                    490:  * Plain symbol-by-symbol comparison.
                    491:  */
                    492: int
                    493: top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
                    494: {
                    495:        if (default_sort_mods->rflag) {
                    496:                const struct bwstring *tmp;
                    497:
                    498:                tmp = s1;
                    499:                s1 = s2;
                    500:                s2 = tmp;
                    501:        }
                    502:
                    503:        return bwscoll(s1, s2, 0);
                    504: }
                    505:
                    506: /*
                    507:  * Compare a string and a sort list item, according to the sort specs.
                    508:  */
                    509: int
                    510: str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
                    511: {
                    512:        struct keys_array *ka1;
                    513:        int ret = 0;
                    514:
                    515:        ka1 = keys_array_alloc();
                    516:
                    517:        preproc(str1, ka1);
                    518:
                    519:        sort_list_item_make_key(*ss2);
                    520:
                    521:        if (debug_sort) {
                    522:                bwsprintf(stdout, str1, "; s1=<", ">");
                    523:                bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
                    524:        }
                    525:
                    526:        ret = key_coll(ka1, &((*ss2)->ka), 0);
                    527:
                    528:        if (debug_sort)
                    529:                printf("; cmp1=%d", ret);
                    530:
                    531:        clean_keys_array(str1, ka1);
                    532:        sort_free(ka1);
                    533:
                    534:        if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
                    535:                ret = top_level_str_coll(str1, ((*ss2)->str));
                    536:                if (debug_sort)
                    537:                        printf("; cmp2=%d", ret);
                    538:        }
                    539:
                    540:        if (debug_sort)
                    541:                putchar('\n');
                    542:
                    543:        return ret;
                    544: }
                    545:
                    546: /*
                    547:  * Compare two sort list items, according to the sort specs.
                    548:  */
                    549: int
                    550: list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
                    551:     size_t offset)
                    552: {
                    553:        int ret;
                    554:
                    555:        ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
                    556:
                    557:        if (debug_sort) {
                    558:                if (offset)
                    559:                        printf("; offset=%d", (int) offset);
                    560:                bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
                    561:                bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
                    562:                printf("; cmp1=%d\n", ret);
                    563:        }
                    564:
                    565:        if (ret)
                    566:                return ret;
                    567:
                    568:        if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
                    569:                ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
                    570:                if (debug_sort)
                    571:                        printf("; cmp2=%d\n", ret);
                    572:        }
                    573:
                    574:        return ret;
                    575: }
                    576:
                    577: /*
                    578:  * Compare two sort list items, according to the sort specs.
                    579:  */
                    580: int
                    581: list_coll(const void *ss1, const void *ss2)
                    582: {
                    583:        return list_coll_offset((struct sort_list_item **)ss1,
                    584:            (struct sort_list_item **)ss2, 0);
                    585: }
                    586:
1.8       deraadt   587: #define LSCDEF(N)                                                                                      \
                    588: static int                                                                                             \
1.1       millert   589: list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)                                        \
                    590: {                                                                                                      \
                    591:                                                                                                        \
                    592:        return list_coll_offset(ss1, ss2, N);                                                           \
                    593: }
                    594:
                    595: LSCDEF(0)
                    596: LSCDEF(1)
                    597: LSCDEF(2)
                    598: LSCDEF(3)
                    599: LSCDEF(4)
                    600: LSCDEF(5)
                    601: LSCDEF(6)
                    602: LSCDEF(7)
                    603: LSCDEF(8)
                    604: LSCDEF(9)
                    605: LSCDEF(10)
                    606: LSCDEF(11)
                    607: LSCDEF(12)
                    608: LSCDEF(13)
                    609: LSCDEF(14)
                    610: LSCDEF(15)
                    611: LSCDEF(16)
                    612: LSCDEF(17)
                    613: LSCDEF(18)
                    614: LSCDEF(19)
                    615: LSCDEF(20)
                    616:
                    617: listcoll_t
                    618: get_list_call_func(size_t offset)
                    619: {
                    620:        static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
                    621:            list_coll_2, list_coll_3, list_coll_4, list_coll_5,
                    622:            list_coll_6, list_coll_7, list_coll_8, list_coll_9,
                    623:            list_coll_10, list_coll_11, list_coll_12, list_coll_13,
                    624:            list_coll_14, list_coll_15, list_coll_16, list_coll_17,
                    625:            list_coll_18, list_coll_19, list_coll_20 };
                    626:
                    627:        if (offset <= 20)
                    628:                return lsarray[offset];
                    629:
                    630:        return list_coll_0;
                    631: }
                    632:
                    633: /*
                    634:  * Compare two sort list items, only by their original string.
                    635:  */
                    636: int
                    637: list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
                    638: {
                    639:        return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
                    640: }
                    641:
                    642: /*
                    643:  * Maximum size of a number in the string (before or after decimal point)
                    644:  */
                    645: #define MAX_NUM_SIZE (128)
                    646:
                    647: /*
                    648:  * Set suffix value
                    649:  */
                    650: static void
                    651: setsuffix(wchar_t c, unsigned char *si)
                    652: {
                    653:        switch (c){
                    654:        case L'k':
                    655:        case L'K':
                    656:                *si = 1;
                    657:                break;
                    658:        case L'M':
                    659:                *si = 2;
                    660:                break;
                    661:        case L'G':
                    662:                *si = 3;
                    663:                break;
                    664:        case L'T':
                    665:                *si = 4;
                    666:                break;
                    667:        case L'P':
                    668:                *si = 5;
                    669:                break;
                    670:        case L'E':
                    671:                *si = 6;
                    672:                break;
                    673:        case L'Z':
                    674:                *si = 7;
                    675:                break;
                    676:        case L'Y':
                    677:                *si = 8;
                    678:                break;
                    679:        default:
                    680:                *si = 0;
                    681:        };
                    682: }
                    683:
                    684: /*
                    685:  * Read string s and parse the string into a fixed-decimal-point number.
                    686:  * sign equals -1 if the number is negative (explicit plus is not allowed,
                    687:  * according to GNU sort's "info sort".
                    688:  * The number part before decimal point is in the smain, after the decimal
                    689:  * point is in sfrac, tail is the pointer to the remainder of the string.
                    690:  */
                    691: static int
                    692: read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
                    693: {
                    694:        bwstring_iterator s;
                    695:
                    696:        s = bws_begin(s0);
                    697:
                    698:        /* always end the fraction with zero, even if we have no fraction */
                    699:        sfrac[0] = 0;
                    700:
                    701:        while (iswblank(bws_get_iter_value(s)))
                    702:                s = bws_iterator_inc(s, 1);
                    703:
                    704:        if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
                    705:                *sign = -1;
                    706:                s = bws_iterator_inc(s, 1);
                    707:        }
                    708:
                    709:        // This is '0', not '\0', do not change this
                    710:        while (iswdigit(bws_get_iter_value(s)) &&
                    711:            (bws_get_iter_value(s) == L'0'))
                    712:                s = bws_iterator_inc(s, 1);
                    713:
                    714:        while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
                    715:                if (iswdigit(bws_get_iter_value(s))) {
                    716:                        smain[*main_len] = bws_get_iter_value(s);
                    717:                        s = bws_iterator_inc(s, 1);
                    718:                        *main_len += 1;
                    719:                } else if (symbol_thousands_sep &&
                    720:                    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
                    721:                        s = bws_iterator_inc(s, 1);
                    722:                else
                    723:                        break;
                    724:        }
                    725:
                    726:        smain[*main_len] = 0;
                    727:
                    728:        if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
                    729:                s = bws_iterator_inc(s, 1);
                    730:                while (iswdigit(bws_get_iter_value(s)) &&
                    731:                    *frac_len < MAX_NUM_SIZE) {
                    732:                        sfrac[*frac_len] = bws_get_iter_value(s);
                    733:                        s = bws_iterator_inc(s, 1);
                    734:                        *frac_len += 1;
                    735:                }
                    736:                sfrac[*frac_len] = 0;
                    737:
                    738:                while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
                    739:                        --(*frac_len);
                    740:                        sfrac[*frac_len] = L'\0';
                    741:                }
                    742:        }
                    743:
                    744:        setsuffix(bws_get_iter_value(s), si);
                    745:
                    746:        if ((*main_len + *frac_len) == 0)
                    747:                *sign = 0;
                    748:
                    749:        return 0;
                    750: }
                    751:
                    752: /*
                    753:  * Implements string sort.
                    754:  */
                    755: static int
                    756: wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
                    757: {
                    758:
                    759:        if (debug_sort) {
                    760:                if (offset)
                    761:                        printf("; offset=%d\n", (int) offset);
                    762:                bwsprintf(stdout, kv1->k, "; k1=<", ">");
                    763:                printf("(%zu)", BWSLEN(kv1->k));
                    764:                bwsprintf(stdout, kv2->k, ", k2=<", ">");
                    765:                printf("(%zu)", BWSLEN(kv2->k));
                    766:        }
                    767:
                    768:        return bwscoll(kv1->k, kv2->k, offset);
                    769: }
                    770:
                    771: /*
                    772:  * Compare two suffixes
                    773:  */
                    774: static inline int
                    775: cmpsuffix(unsigned char si1, unsigned char si2)
                    776: {
                    777:        return (char)si1 - (char)si2;
                    778: }
                    779:
                    780: /*
                    781:  * Implements numeric sort for -n and -h.
                    782:  */
                    783: static int
                    784: numcoll_impl(struct key_value *kv1, struct key_value *kv2,
                    785:     size_t offset __unused, bool use_suffix)
                    786: {
                    787:        struct bwstring *s1, *s2;
                    788:        wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
                    789:        wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
                    790:        int cmp_res, sign1, sign2;
                    791:        size_t frac1, frac2, main1, main2;
                    792:        unsigned char SI1, SI2;
                    793:        bool e1, e2, key1_read, key2_read;
                    794:
                    795:        s1 = kv1->k;
                    796:        s2 = kv2->k;
                    797:        sign1 = sign2 = 0;
                    798:        main1 = main2 = 0;
                    799:        frac1 = frac2 = 0;
                    800:
                    801:        key1_read = key2_read = false;
                    802:
                    803:        if (debug_sort) {
                    804:                bwsprintf(stdout, s1, "; k1=<", ">");
                    805:                bwsprintf(stdout, s2, ", k2=<", ">");
                    806:        }
                    807:
                    808:        if (s1 == s2)
                    809:                return 0;
                    810:
                    811:        if (kv1->hint->status == HS_UNINITIALIZED) {
                    812:                /* read the number from the string */
                    813:                read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
                    814:                key1_read = true;
                    815:                kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
                    816:                if (main1 < 1 && frac1 < 1)
                    817:                        kv1->hint->v.nh.empty=true;
                    818:                kv1->hint->v.nh.si = SI1;
                    819:                kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
                    820:                    HS_INITIALIZED : HS_ERROR;
                    821:                kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
                    822:        }
                    823:
                    824:        if (kv2->hint->status == HS_UNINITIALIZED) {
                    825:                /* read the number from the string */
                    826:                read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
                    827:                key2_read = true;
                    828:                kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
                    829:                if (main2 < 1 && frac2 < 1)
                    830:                        kv2->hint->v.nh.empty=true;
                    831:                kv2->hint->v.nh.si = SI2;
                    832:                kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
                    833:                    HS_INITIALIZED : HS_ERROR;
                    834:                kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
                    835:        }
                    836:
                    837:        if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
                    838:            HS_INITIALIZED) {
                    839:                unsigned long long n1, n2;
                    840:                bool neg1, neg2;
                    841:
                    842:                e1 = kv1->hint->v.nh.empty;
                    843:                e2 = kv2->hint->v.nh.empty;
                    844:
                    845:                if (e1 && e2)
                    846:                        return 0;
                    847:
                    848:                neg1 = kv1->hint->v.nh.neg;
                    849:                neg2 = kv2->hint->v.nh.neg;
                    850:
                    851:                if (neg1 && !neg2)
                    852:                        return -1;
                    853:                if (neg2 && !neg1)
                    854:                        return 1;
                    855:
                    856:                if (e1)
                    857:                        return neg2 ? 1 : -1;
                    858:                else if (e2)
                    859:                        return neg1 ? -1 : 1;
                    860:
                    861:
                    862:                if (use_suffix) {
                    863:                        cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
                    864:                        if (cmp_res)
                    865:                                return neg1 ? -cmp_res : cmp_res;
                    866:                }
                    867:
                    868:                n1 = kv1->hint->v.nh.n1;
                    869:                n2 = kv2->hint->v.nh.n1;
                    870:                if (n1 < n2)
                    871:                        return neg1 ? 1 : -1;
                    872:                else if (n1 > n2)
                    873:                        return neg1 ? -1 : 1;
                    874:        }
                    875:
                    876:        /* read the numbers from the strings */
                    877:        if (!key1_read)
                    878:                read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
                    879:        if (!key2_read)
                    880:                read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
                    881:
                    882:        e1 = ((main1 + frac1) == 0);
                    883:        e2 = ((main2 + frac2) == 0);
                    884:
                    885:        if (e1 && e2)
                    886:                return 0;
                    887:
                    888:        /* we know the result if the signs are different */
                    889:        if (sign1 < 0 && sign2 >= 0)
                    890:                return -1;
                    891:        if (sign1 >= 0 && sign2 < 0)
                    892:                return 1;
                    893:
                    894:        if (e1)
                    895:                return (sign2 < 0) ? +1 : -1;
                    896:        else if (e2)
                    897:                return (sign1 < 0) ? -1 : 1;
                    898:
                    899:        if (use_suffix) {
                    900:                cmp_res = cmpsuffix(SI1, SI2);
                    901:                if (cmp_res)
                    902:                        return (sign1 < 0) ? -cmp_res : cmp_res;
                    903:        }
                    904:
                    905:        /* if both numbers are empty assume that the strings are equal */
                    906:        if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
                    907:                return 0;
                    908:
                    909:        /*
                    910:         * if the main part is of different size, we know the result
                    911:         * (because the leading zeros are removed)
                    912:         */
                    913:        if (main1 < main2)
                    914:                cmp_res = -1;
                    915:        else if (main1 > main2)
                    916:                cmp_res = +1;
                    917:        /* if the sizes are equal then simple non-collate string compare gives the correct result */
                    918:        else
                    919:                cmp_res = wcscmp(smain1, smain2);
                    920:
                    921:        /* check fraction */
                    922:        if (!cmp_res)
                    923:                cmp_res = wcscmp(sfrac1, sfrac2);
                    924:
                    925:        if (!cmp_res)
                    926:                return 0;
                    927:
                    928:        /* reverse result if the signs are negative */
                    929:        if (sign1 < 0 && sign2 < 0)
                    930:                cmp_res = -cmp_res;
                    931:
                    932:        return cmp_res;
                    933: }
                    934:
                    935: /*
                    936:  * Implements numeric sort (-n).
                    937:  */
                    938: static int
                    939: numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
                    940: {
                    941:        return numcoll_impl(kv1, kv2, offset, false);
                    942: }
                    943:
                    944: /*
                    945:  * Implements 'human' numeric sort (-h).
                    946:  */
                    947: static int
                    948: hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
                    949: {
                    950:        return numcoll_impl(kv1, kv2, offset, true);
                    951: }
                    952:
                    953: /*
                    954:  * Implements random sort (-R).
                    955:  */
                    956: static int
                    957: randomcoll(struct key_value *kv1, struct key_value *kv2,
                    958:     size_t offset __unused)
                    959: {
                    960:        struct bwstring *s1, *s2;
                    961:        MD5_CTX ctx1, ctx2;
                    962:        char *b1, *b2;
                    963:
                    964:        s1 = kv1->k;
                    965:        s2 = kv2->k;
                    966:
                    967:        if (debug_sort) {
                    968:                bwsprintf(stdout, s1, "; k1=<", ">");
                    969:                bwsprintf(stdout, s2, ", k2=<", ">");
                    970:        }
                    971:
                    972:        if (s1 == s2)
                    973:                return 0;
                    974:
                    975:        memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
                    976:        memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
                    977:
                    978:        MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
                    979:        MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
                    980:        b1 = MD5End(&ctx1, NULL);
                    981:        b2 = MD5End(&ctx2, NULL);
                    982:        if (b1 == NULL) {
                    983:                if (b2 == NULL)
                    984:                        return 0;
                    985:                else {
                    986:                        sort_free(b2);
                    987:                        return -1;
                    988:                }
                    989:        } else if (b2 == NULL) {
                    990:                sort_free(b1);
                    991:                return 1;
                    992:        } else {
                    993:                int cmp_res;
                    994:
                    995:                cmp_res = strcmp(b1, b2);
                    996:                sort_free(b1);
                    997:                sort_free(b2);
                    998:
                    999:                if (!cmp_res)
                   1000:                        cmp_res = bwscoll(s1, s2, 0);
                   1001:
                   1002:                return cmp_res;
                   1003:        }
                   1004: }
                   1005:
                   1006: /*
                   1007:  * Implements version sort (-V).
                   1008:  */
                   1009: static int
                   1010: versioncoll(struct key_value *kv1, struct key_value *kv2,
                   1011:     size_t offset __unused)
                   1012: {
                   1013:        struct bwstring *s1, *s2;
                   1014:
                   1015:        s1 = kv1->k;
                   1016:        s2 = kv2->k;
                   1017:
                   1018:        if (debug_sort) {
                   1019:                bwsprintf(stdout, s1, "; k1=<", ">");
                   1020:                bwsprintf(stdout, s2, ", k2=<", ">");
                   1021:        }
                   1022:
                   1023:        if (s1 == s2)
                   1024:                return 0;
                   1025:
                   1026:        return vcmp(s1, s2);
                   1027: }
                   1028:
                   1029: /*
                   1030:  * Check for minus infinity
                   1031:  */
                   1032: static inline bool
                   1033: huge_minus(double d, int err1)
                   1034: {
                   1035:        if (err1 == ERANGE)
                   1036:                if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
                   1037:                        return 1;
                   1038:
                   1039:        return 0;
                   1040: }
                   1041:
                   1042: /*
                   1043:  * Check for plus infinity
                   1044:  */
                   1045: static inline bool
                   1046: huge_plus(double d, int err1)
                   1047: {
                   1048:        if (err1 == ERANGE)
                   1049:                if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
                   1050:                        return 1;
                   1051:
                   1052:        return 0;
                   1053: }
                   1054:
                   1055: /*
                   1056:  * Check whether a function is a NAN
                   1057:  */
                   1058: static bool
                   1059: is_nan(double d)
                   1060: {
1.3       miod     1061: #if defined(NAN)
1.1       millert  1062:        return (d == NAN || isnan(d));
1.3       miod     1063: #else
                   1064:        return (isnan(d));
                   1065: #endif
1.1       millert  1066: }
                   1067:
                   1068: /*
                   1069:  * Compare two NANs
                   1070:  */
                   1071: static int
                   1072: cmp_nans(double d1, double d2)
                   1073: {
                   1074:        if (d1 == d2)
                   1075:                return 0;
                   1076:        return d1 < d2 ? -1 : 1;
                   1077: }
                   1078:
                   1079: /*
                   1080:  * Implements general numeric sort (-g).
                   1081:  */
                   1082: static int
                   1083: gnumcoll(struct key_value *kv1, struct key_value *kv2,
                   1084:     size_t offset __unused)
                   1085: {
                   1086:        double d1, d2;
                   1087:        int err1, err2;
                   1088:        bool empty1, empty2, key1_read, key2_read;
                   1089:
                   1090:        d1 = d2 = 0;
                   1091:        err1 = err2 = 0;
                   1092:        key1_read = key2_read = false;
                   1093:
                   1094:        if (debug_sort) {
                   1095:                bwsprintf(stdout, kv1->k, "; k1=<", ">");
                   1096:                bwsprintf(stdout, kv2->k, "; k2=<", ">");
                   1097:        }
                   1098:
                   1099:        if (kv1->hint->status == HS_UNINITIALIZED) {
                   1100:                errno = 0;
                   1101:                d1 = bwstod(kv1->k, &empty1);
                   1102:                err1 = errno;
                   1103:
                   1104:                if (empty1)
                   1105:                        kv1->hint->v.gh.notnum = true;
                   1106:                else if (err1 == 0) {
                   1107:                        kv1->hint->v.gh.d = d1;
                   1108:                        kv1->hint->v.gh.nan = is_nan(d1);
                   1109:                        kv1->hint->status = HS_INITIALIZED;
                   1110:                } else
                   1111:                        kv1->hint->status = HS_ERROR;
                   1112:
                   1113:                key1_read = true;
                   1114:        }
                   1115:
                   1116:        if (kv2->hint->status == HS_UNINITIALIZED) {
                   1117:                errno = 0;
                   1118:                d2 = bwstod(kv2->k, &empty2);
                   1119:                err2 = errno;
                   1120:
                   1121:                if (empty2)
                   1122:                        kv2->hint->v.gh.notnum = true;
                   1123:                else if (err2 == 0) {
                   1124:                        kv2->hint->v.gh.d = d2;
                   1125:                        kv2->hint->v.gh.nan = is_nan(d2);
                   1126:                        kv2->hint->status = HS_INITIALIZED;
                   1127:                } else
                   1128:                        kv2->hint->status = HS_ERROR;
                   1129:
                   1130:                key2_read = true;
                   1131:        }
                   1132:
                   1133:        if (kv1->hint->status == HS_INITIALIZED &&
                   1134:            kv2->hint->status == HS_INITIALIZED) {
                   1135:                if (kv1->hint->v.gh.notnum)
                   1136:                        return kv2->hint->v.gh.notnum ? 0 : -1;
                   1137:                else if (kv2->hint->v.gh.notnum)
                   1138:                        return 1;
                   1139:
                   1140:                if (kv1->hint->v.gh.nan)
                   1141:                        return kv2->hint->v.gh.nan ?
                   1142:                            cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
                   1143:                else if (kv2->hint->v.gh.nan)
                   1144:                        return 1;
                   1145:
                   1146:                d1 = kv1->hint->v.gh.d;
                   1147:                d2 = kv2->hint->v.gh.d;
                   1148:
                   1149:                if (d1 < d2)
                   1150:                        return -1;
                   1151:                else if (d1 > d2)
                   1152:                        return 1;
                   1153:                else
                   1154:                        return 0;
                   1155:        }
                   1156:
                   1157:        if (!key1_read) {
                   1158:                errno = 0;
                   1159:                d1 = bwstod(kv1->k, &empty1);
                   1160:                err1 = errno;
                   1161:        }
                   1162:
                   1163:        if (!key2_read) {
                   1164:                errno = 0;
                   1165:                d2 = bwstod(kv2->k, &empty2);
                   1166:                err2 = errno;
                   1167:        }
                   1168:
                   1169:        /* Non-value case: */
                   1170:        if (empty1)
                   1171:                return empty2 ? 0 : -1;
                   1172:        else if (empty2)
                   1173:                return 1;
                   1174:
                   1175:        /* NAN case */
                   1176:        if (is_nan(d1))
                   1177:                return is_nan(d2) ? cmp_nans(d1, d2) : -1;
                   1178:        else if (is_nan(d2))
                   1179:                return 1;
                   1180:
                   1181:        /* Infinities */
                   1182:        if (err1 == ERANGE || err2 == ERANGE) {
                   1183:                /* Minus infinity case */
                   1184:                if (huge_minus(d1, err1)) {
                   1185:                        if (huge_minus(d2, err2)) {
                   1186:                                if (d1 == d2)
                   1187:                                        return 0;
                   1188:                                return d1 < d2 ? -1 : 1;
                   1189:                        } else
                   1190:                                return -1;
                   1191:
                   1192:                } else if (huge_minus(d2, err2)) {
                   1193:                        if (huge_minus(d1, err1)) {
                   1194:                                if (d1 == d2)
                   1195:                                        return 0;
                   1196:                                return d1 < d2 ? -1 : 1;
                   1197:                        } else
                   1198:                                return 1;
                   1199:                }
                   1200:
                   1201:                /* Plus infinity case */
                   1202:                if (huge_plus(d1, err1)) {
                   1203:                        if (huge_plus(d2, err2)) {
                   1204:                                if (d1 == d2)
                   1205:                                        return 0;
                   1206:                                return d1 < d2 ? -1 : 1;
                   1207:                        } else
                   1208:                                return 1;
                   1209:                } else if (huge_plus(d2, err2)) {
                   1210:                        if (huge_plus(d1, err1)) {
                   1211:                                if (d1 == d2)
                   1212:                                        return 0;
                   1213:                                return d1 < d2 ? -1 : 1;
                   1214:                        } else
                   1215:                                return -1;
                   1216:                }
                   1217:        }
                   1218:
                   1219:        if (d1 == d2)
                   1220:                return 0;
                   1221:        return d1 < d2 ? -1 : 1;
                   1222: }
                   1223:
                   1224: /*
                   1225:  * Implements month sort (-M).
                   1226:  */
                   1227: static int
                   1228: monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
                   1229: {
                   1230:        int val1, val2;
                   1231:        bool key1_read, key2_read;
                   1232:
                   1233:        val1 = val2 = 0;
                   1234:        key1_read = key2_read = false;
                   1235:
                   1236:        if (debug_sort) {
                   1237:                bwsprintf(stdout, kv1->k, "; k1=<", ">");
                   1238:                bwsprintf(stdout, kv2->k, "; k2=<", ">");
                   1239:        }
                   1240:
                   1241:        if (kv1->hint->status == HS_UNINITIALIZED) {
                   1242:                kv1->hint->v.Mh.m = bws_month_score(kv1->k);
                   1243:                key1_read = true;
                   1244:                kv1->hint->status = HS_INITIALIZED;
                   1245:        }
                   1246:
                   1247:        if (kv2->hint->status == HS_UNINITIALIZED) {
                   1248:                kv2->hint->v.Mh.m = bws_month_score(kv2->k);
                   1249:                key2_read = true;
                   1250:                kv2->hint->status = HS_INITIALIZED;
                   1251:        }
                   1252:
                   1253:        if (kv1->hint->status == HS_INITIALIZED) {
                   1254:                val1 = kv1->hint->v.Mh.m;
                   1255:                key1_read = true;
                   1256:        }
                   1257:
                   1258:        if (kv2->hint->status == HS_INITIALIZED) {
                   1259:                val2 = kv2->hint->v.Mh.m;
                   1260:                key2_read = true;
                   1261:        }
                   1262:
                   1263:        if (!key1_read)
                   1264:                val1 = bws_month_score(kv1->k);
                   1265:        if (!key2_read)
                   1266:                val2 = bws_month_score(kv2->k);
                   1267:
                   1268:        if (val1 == val2)
                   1269:                return 0;
                   1270:        return val1 < val2 ? -1 : 1;
                   1271: }