src/usr.bin/sort/coll.c - annotate

Return to coll.c CVS log
Up to [local] / src / usr.bin / sort
Annotation of src/usr.bin/sort/coll.c, Revision 1.12

1.12    ! schwarze    1: /*     $OpenBSD: coll.c,v 1.11 2015/12/11 21:41:51 mmcc Exp $  */
1.1       millert     2:
                      3: /*-
                      4:  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
                      5:  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  */
                     29:
                     30: #include <sys/types.h>
                     31:
                     32: #include <errno.h>
                     33: #include <err.h>
                     34: #include <langinfo.h>
                     35: #include <limits.h>
                     36: #include <math.h>
                     37: #include <md5.h>
                     38: #include <stdlib.h>
                     39: #include <string.h>
                     40: #include <wchar.h>
                     41: #include <wctype.h>
                     42:
                     43: #include "coll.h"
                     44: #include "vsort.h"
                     45:
                     46: struct key_specs *keys;
                     47: size_t keys_num = 0;
                     48:
                     49: static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
                     50: static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
                     51: static int monthcoll(struct key_value*, struct key_value *, size_t offset);
                     52: static int numcoll(struct key_value*, struct key_value *, size_t offset);
                     53: static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
                     54: static int randomcoll(struct key_value*, struct key_value *, size_t offset);
                     55: static int versioncoll(struct key_value*, struct key_value *, size_t offset);
                     56:
                     57: /*
                     58:  * Allocate keys array
                     59:  */
                     60: struct keys_array *
                     61: keys_array_alloc(void)
                     62: {
                     63:        struct keys_array *ka;
                     64:        size_t sz;
                     65:
                     66:        sz = keys_array_size();
1.2       millert    67:        ka = sort_calloc(1, sz);
1.1       millert    68:
                     69:        return ka;
                     70: }
                     71:
                     72: /*
1.8       deraadt    73:  * Calculate whether we need key hint space
1.1       millert    74:  */
                     75: static size_t
                     76: key_hint_size(void)
                     77: {
                     78:        return need_hint ? sizeof(struct key_hint) : 0;
                     79: }
                     80:
                     81: /*
                     82:  * Calculate keys array size
                     83:  */
                     84: size_t
                     85: keys_array_size(void)
                     86: {
                     87:        return keys_num * (sizeof(struct key_value) + key_hint_size());
                     88: }
                     89:
                     90: /*
                     91:  * Clean data of keys array
                     92:  */
                     93: void
                     94: clean_keys_array(const struct bwstring *s, struct keys_array *ka)
                     95: {
                     96:        if (ka) {
                     97:                size_t i;
                     98:
                     99:                for (i = 0; i < keys_num; ++i)
                    100:                        if (ka->key[i].k && ka->key[i].k != s)
                    101:                                bwsfree(ka->key[i].k);
                    102:                memset(ka, 0, keys_array_size());
                    103:        }
                    104: }
                    105:
                    106: /*
                    107:  * Set value of a key in the keys set
                    108:  */
                    109: void
                    110: set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
                    111: {
                    112:        if (ka && keys_num > ind) {
                    113:                struct key_value *kv;
                    114:
                    115:                kv = &(ka->key[ind]);
                    116:
1.7       tobias    117:                if (kv->k != s)
1.1       millert   118:                        bwsfree(kv->k);
                    119:                kv->k = s;
                    120:        }
                    121: }
                    122:
                    123: /*
                    124:  * Initialize a sort list item
                    125:  */
                    126: struct sort_list_item *
                    127: sort_list_item_alloc(void)
                    128: {
                    129:        struct sort_list_item *si;
                    130:        size_t sz;
                    131:
                    132:        sz = sizeof(struct sort_list_item) + keys_array_size();
1.2       millert   133:        si = sort_calloc(1, sz);
1.1       millert   134:
                    135:        return si;
                    136: }
                    137:
                    138: size_t
                    139: sort_list_item_size(struct sort_list_item *si)
                    140: {
                    141:        size_t i, ret = 0;
                    142:
                    143:        if (si) {
                    144:                ret = sizeof(struct sort_list_item) + keys_array_size();
                    145:                if (si->str)
                    146:                        ret += bws_memsize(si->str);
                    147:                for (i = 0; i < keys_num; ++i) {
                    148:                        struct key_value *kv;
                    149:
                    150:                        kv = &(si->ka.key[i]);
                    151:
                    152:                        if (kv->k != si->str)
                    153:                                ret += bws_memsize(kv->k);
                    154:                }
                    155:        }
                    156:        return ret;
                    157: }
                    158:
                    159: /*
                    160:  * Calculate key for a sort list item
                    161:  */
                    162: static void
                    163: sort_list_item_make_key(struct sort_list_item *si)
                    164: {
                    165:        preproc(si->str, &(si->ka));
                    166: }
                    167:
                    168: /*
                    169:  * Set value of a sort list item.
                    170:  * Return combined string and keys memory size.
                    171:  */
                    172: void
                    173: sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
                    174: {
                    175:        if (si) {
                    176:                clean_keys_array(si->str, &(si->ka));
                    177:                if (si->str) {
                    178:                        if (si->str == str) {
                    179:                                /* we are trying to reset the same string */
                    180:                                return;
                    181:                        } else {
                    182:                                bwsfree(si->str);
                    183:                                si->str = NULL;
                    184:                        }
                    185:                }
                    186:                si->str = str;
                    187:                sort_list_item_make_key(si);
                    188:        }
                    189: }
                    190:
                    191: /*
                    192:  * De-allocate a sort list item object memory
                    193:  */
                    194: void
                    195: sort_list_item_clean(struct sort_list_item *si)
                    196: {
                    197:        if (si) {
                    198:                clean_keys_array(si->str, &(si->ka));
                    199:                if (si->str) {
                    200:                        bwsfree(si->str);
                    201:                        si->str = NULL;
                    202:                }
                    203:        }
                    204: }
                    205:
                    206: /*
                    207:  * Skip columns according to specs
                    208:  */
                    209: static size_t
                    210: skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
                    211:     bool skip_blanks, bool *empty_key)
                    212: {
                    213:        if (cols < 1)
                    214:                return BWSLEN(s) + 1;
                    215:
                    216:        if (skip_blanks)
                    217:                while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
                    218:                        ++start;
                    219:
                    220:        while (start < BWSLEN(s) && cols > 1) {
                    221:                --cols;
                    222:                ++start;
                    223:        }
                    224:
                    225:        if (start >= BWSLEN(s))
                    226:                *empty_key = true;
                    227:
                    228:        return start;
                    229: }
                    230:
                    231: /*
                    232:  * Skip fields according to specs
                    233:  */
                    234: static size_t
                    235: skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
                    236: {
                    237:        if (fields < 2) {
                    238:                if (BWSLEN(s) == 0)
                    239:                        *empty_field = true;
                    240:                return 0;
                    241:        } else if (!(sort_opts_vals.tflag)) {
                    242:                size_t cpos = 0;
                    243:                bool pb = true;
                    244:
                    245:                while (cpos < BWSLEN(s)) {
                    246:                        bool isblank;
                    247:
                    248:                        isblank = iswblank(BWS_GET(s, cpos));
                    249:
                    250:                        if (isblank && !pb) {
                    251:                                --fields;
                    252:                                if (fields <= 1)
                    253:                                        return cpos;
                    254:                        }
                    255:                        pb = isblank;
                    256:                        ++cpos;
                    257:                }
                    258:                if (fields > 1)
                    259:                        *empty_field = true;
                    260:                return cpos;
                    261:        } else {
                    262:                size_t cpos = 0;
                    263:
                    264:                while (cpos < BWSLEN(s)) {
                    265:                        if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
                    266:                                --fields;
                    267:                                if (fields <= 1)
                    268:                                        return cpos + 1;
                    269:                        }
                    270:                        ++cpos;
                    271:                }
                    272:                if (fields > 1)
                    273:                        *empty_field = true;
                    274:                return cpos;
                    275:        }
                    276: }
                    277:
                    278: /*
                    279:  * Find fields start
                    280:  */
                    281: static void
                    282: find_field_start(const struct bwstring *s, struct key_specs *ks,
                    283:     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
                    284: {
                    285:        *field_start = skip_fields_to_start(s, ks->f1, empty_field);
                    286:        if (!*empty_field)
                    287:                *key_start = skip_cols_to_start(s, ks->c1, *field_start,
                    288:                    ks->pos1b, empty_key);
                    289:        else
                    290:                *empty_key = true;
                    291: }
                    292:
                    293: /*
                    294:  * Find end key position
                    295:  */
                    296: static size_t
                    297: find_field_end(const struct bwstring *s, struct key_specs *ks)
                    298: {
                    299:        size_t f2, next_field_start, pos_end;
                    300:        bool empty_field, empty_key;
                    301:
                    302:        empty_field = false;
                    303:        empty_key = false;
                    304:        f2 = ks->f2;
                    305:
                    306:        if (f2 == 0)
                    307:                return BWSLEN(s) + 1;
                    308:        else {
                    309:                if (ks->c2 == 0) {
                    310:                        next_field_start = skip_fields_to_start(s, f2 + 1,
                    311:                            &empty_field);
                    312:                        if ((next_field_start > 0) && sort_opts_vals.tflag &&
                    313:                            ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
                    314:                            next_field_start - 1)))
                    315:                                --next_field_start;
                    316:                } else
                    317:                        next_field_start = skip_fields_to_start(s, f2,
                    318:                            &empty_field);
                    319:        }
                    320:
                    321:        if (empty_field || (next_field_start >= BWSLEN(s)))
                    322:                return BWSLEN(s) + 1;
                    323:
                    324:        if (ks->c2) {
                    325:                pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
                    326:                    ks->pos2b, &empty_key);
                    327:                if (pos_end < BWSLEN(s))
                    328:                        ++pos_end;
                    329:        } else
                    330:                pos_end = next_field_start;
                    331:
                    332:        return pos_end;
                    333: }
                    334:
                    335: /*
                    336:  * Cut a field according to the key specs
                    337:  */
                    338: static struct bwstring *
                    339: cut_field(const struct bwstring *s, struct key_specs *ks)
                    340: {
                    341:        struct bwstring *ret = NULL;
                    342:
                    343:        if (s && ks) {
                    344:                size_t field_start, key_end, key_start, sz;
                    345:                bool empty_field, empty_key;
                    346:
                    347:                field_start = 0;
                    348:                key_start = 0;
                    349:                empty_field = false;
                    350:                empty_key = false;
                    351:
                    352:                find_field_start(s, ks, &field_start, &key_start,
                    353:                    &empty_field, &empty_key);
                    354:
                    355:                if (empty_key)
                    356:                        sz = 0;
                    357:                else {
                    358:                        key_end = find_field_end(s, ks);
                    359:                        sz = (key_end < key_start) ? 0 : (key_end - key_start);
                    360:                }
                    361:
                    362:                ret = bwsalloc(sz);
                    363:                if (sz)
                    364:                        bwsnocpy(ret, s, key_start, sz);
                    365:        } else
                    366:                ret = bwsalloc(0);
                    367:
                    368:        return ret;
                    369: }
                    370:
                    371: /*
                    372:  * Preprocesses a line applying the necessary transformations
                    373:  * specified by command line options and returns the preprocessed
                    374:  * string, which can be used to compare.
                    375:  */
                    376: int
                    377: preproc(struct bwstring *s, struct keys_array *ka)
                    378: {
                    379:        if (sort_opts_vals.kflag) {
                    380:                size_t i;
                    381:                for (i = 0; i < keys_num; i++) {
                    382:                        struct bwstring *key;
                    383:                        struct key_specs *kspecs;
                    384:                        struct sort_mods *sm;
                    385:
                    386:                        kspecs = &(keys[i]);
                    387:                        key = cut_field(s, kspecs);
                    388:
                    389:                        sm = &(kspecs->sm);
                    390:                        if (sm->dflag)
                    391:                                key = dictionary_order(key);
                    392:                        else if (sm->iflag)
                    393:                                key = ignore_nonprinting(key);
                    394:                        if (sm->fflag || sm->Mflag)
                    395:                                key = ignore_case(key);
                    396:
                    397:                        set_key_on_keys_array(ka, key, i);
                    398:                }
                    399:        } else {
                    400:                struct bwstring *ret = NULL;
                    401:                struct sort_mods *sm = default_sort_mods;
                    402:
1.9       millert   403: #ifdef GNUSORT_COMPATIBILITY
1.1       millert   404:                if (sm->bflag) {
                    405:                        if (ret == NULL)
                    406:                                ret = bwsdup(s);
                    407:                        ret = ignore_leading_blanks(ret);
                    408:                }
1.9       millert   409: #endif
1.1       millert   410:                if (sm->dflag) {
                    411:                        if (ret == NULL)
                    412:                                ret = bwsdup(s);
                    413:                        ret = dictionary_order(ret);
                    414:                } else if (sm->iflag) {
                    415:                        if (ret == NULL)
                    416:                                ret = bwsdup(s);
                    417:                        ret = ignore_nonprinting(ret);
                    418:                }
                    419:                if (sm->fflag || sm->Mflag) {
                    420:                        if (ret == NULL)
                    421:                                ret = bwsdup(s);
                    422:                        ret = ignore_case(ret);
                    423:                }
                    424:                if (ret == NULL)
                    425:                        set_key_on_keys_array(ka, s, 0);
                    426:                else
                    427:                        set_key_on_keys_array(ka, ret, 0);
                    428:        }
                    429:
                    430:        return 0;
                    431: }
                    432:
                    433: cmpcoll_t
                    434: get_sort_func(struct sort_mods *sm)
                    435: {
                    436:        if (sm->nflag)
                    437:                return numcoll;
                    438:        else if (sm->hflag)
                    439:                return hnumcoll;
                    440:        else if (sm->gflag)
                    441:                return gnumcoll;
                    442:        else if (sm->Mflag)
                    443:                return monthcoll;
                    444:        else if (sm->Rflag)
                    445:                return randomcoll;
                    446:        else if (sm->Vflag)
                    447:                return versioncoll;
                    448:        else
                    449:                return wstrcoll;
                    450: }
                    451:
                    452: /*
                    453:  * Compares the given strings.  Returns a positive number if
                    454:  * the first precedes the second, a negative number if the second is
                    455:  * the preceding one, and zero if they are equal.  This function calls
                    456:  * the underlying collate functions, which done the actual comparison.
                    457:  */
                    458: int
                    459: key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
                    460: {
                    461:        struct sort_mods *sm;
                    462:        int res = 0;
                    463:        size_t i;
                    464:
                    465:        for (i = 0; i < keys_num; ++i) {
                    466:                sm = &(keys[i].sm);
                    467:
                    468:                if (sm->rflag)
                    469:                        res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
                    470:                else
                    471:                        res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
                    472:
                    473:                if (res)
                    474:                        break;
                    475:
                    476:                /* offset applies to only the first key */
                    477:                offset = 0;
                    478:        }
                    479:        return res;
                    480: }
                    481:
                    482: /*
                    483:  * Compare two strings.
                    484:  * Plain symbol-by-symbol comparison.
                    485:  */
                    486: int
                    487: top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
                    488: {
                    489:        if (default_sort_mods->rflag) {
                    490:                const struct bwstring *tmp;
                    491:
                    492:                tmp = s1;
                    493:                s1 = s2;
                    494:                s2 = tmp;
                    495:        }
                    496:
                    497:        return bwscoll(s1, s2, 0);
                    498: }
                    499:
                    500: /*
                    501:  * Compare a string and a sort list item, according to the sort specs.
                    502:  */
                    503: int
                    504: str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
                    505: {
                    506:        struct keys_array *ka1;
                    507:        int ret = 0;
                    508:
                    509:        ka1 = keys_array_alloc();
                    510:
                    511:        preproc(str1, ka1);
                    512:
                    513:        sort_list_item_make_key(*ss2);
                    514:
                    515:        if (debug_sort) {
                    516:                bwsprintf(stdout, str1, "; s1=<", ">");
                    517:                bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
                    518:        }
                    519:
                    520:        ret = key_coll(ka1, &((*ss2)->ka), 0);
                    521:
                    522:        if (debug_sort)
                    523:                printf("; cmp1=%d", ret);
                    524:
                    525:        clean_keys_array(str1, ka1);
                    526:        sort_free(ka1);
                    527:
                    528:        if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
                    529:                ret = top_level_str_coll(str1, ((*ss2)->str));
                    530:                if (debug_sort)
                    531:                        printf("; cmp2=%d", ret);
                    532:        }
                    533:
                    534:        if (debug_sort)
                    535:                putchar('\n');
                    536:
                    537:        return ret;
                    538: }
                    539:
                    540: /*
                    541:  * Compare two sort list items, according to the sort specs.
                    542:  */
                    543: int
                    544: list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
                    545:     size_t offset)
                    546: {
                    547:        int ret;
                    548:
                    549:        ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
                    550:
                    551:        if (debug_sort) {
                    552:                if (offset)
1.11      mmcc      553:                        printf("; offset=%zu", offset);
1.1       millert   554:                bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
                    555:                bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
                    556:                printf("; cmp1=%d\n", ret);
                    557:        }
                    558:
                    559:        if (ret)
                    560:                return ret;
                    561:
                    562:        if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
                    563:                ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
                    564:                if (debug_sort)
                    565:                        printf("; cmp2=%d\n", ret);
                    566:        }
                    567:
                    568:        return ret;
                    569: }
                    570:
                    571: /*
                    572:  * Compare two sort list items, according to the sort specs.
                    573:  */
                    574: int
                    575: list_coll(const void *ss1, const void *ss2)
                    576: {
                    577:        return list_coll_offset((struct sort_list_item **)ss1,
                    578:            (struct sort_list_item **)ss2, 0);
                    579: }
                    580:
1.8       deraadt   581: #define LSCDEF(N)                                                                                      \
                    582: static int                                                                                             \
1.1       millert   583: list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)                                        \
                    584: {                                                                                                      \
                    585:                                                                                                        \
                    586:        return list_coll_offset(ss1, ss2, N);                                                           \
                    587: }
                    588:
                    589: LSCDEF(0)
                    590: LSCDEF(1)
                    591: LSCDEF(2)
                    592: LSCDEF(3)
                    593: LSCDEF(4)
                    594: LSCDEF(5)
                    595: LSCDEF(6)
                    596: LSCDEF(7)
                    597: LSCDEF(8)
                    598: LSCDEF(9)
                    599: LSCDEF(10)
                    600: LSCDEF(11)
                    601: LSCDEF(12)
                    602: LSCDEF(13)
                    603: LSCDEF(14)
                    604: LSCDEF(15)
                    605: LSCDEF(16)
                    606: LSCDEF(17)
                    607: LSCDEF(18)
                    608: LSCDEF(19)
                    609: LSCDEF(20)
                    610:
                    611: listcoll_t
                    612: get_list_call_func(size_t offset)
                    613: {
                    614:        static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
                    615:            list_coll_2, list_coll_3, list_coll_4, list_coll_5,
                    616:            list_coll_6, list_coll_7, list_coll_8, list_coll_9,
                    617:            list_coll_10, list_coll_11, list_coll_12, list_coll_13,
                    618:            list_coll_14, list_coll_15, list_coll_16, list_coll_17,
                    619:            list_coll_18, list_coll_19, list_coll_20 };
                    620:
                    621:        if (offset <= 20)
                    622:                return lsarray[offset];
                    623:
                    624:        return list_coll_0;
                    625: }
                    626:
                    627: /*
                    628:  * Compare two sort list items, only by their original string.
                    629:  */
                    630: int
                    631: list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
                    632: {
                    633:        return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
                    634: }
                    635:
                    636: /*
                    637:  * Maximum size of a number in the string (before or after decimal point)
                    638:  */
                    639: #define MAX_NUM_SIZE (128)
                    640:
                    641: /*
                    642:  * Set suffix value
                    643:  */
                    644: static void
                    645: setsuffix(wchar_t c, unsigned char *si)
                    646: {
                    647:        switch (c){
                    648:        case L'k':
                    649:        case L'K':
                    650:                *si = 1;
                    651:                break;
                    652:        case L'M':
                    653:                *si = 2;
                    654:                break;
                    655:        case L'G':
                    656:                *si = 3;
                    657:                break;
                    658:        case L'T':
                    659:                *si = 4;
                    660:                break;
                    661:        case L'P':
                    662:                *si = 5;
                    663:                break;
                    664:        case L'E':
                    665:                *si = 6;
                    666:                break;
                    667:        case L'Z':
                    668:                *si = 7;
                    669:                break;
                    670:        case L'Y':
                    671:                *si = 8;
                    672:                break;
                    673:        default:
                    674:                *si = 0;
                    675:        };
                    676: }
                    677:
                    678: /*
                    679:  * Read string s and parse the string into a fixed-decimal-point number.
                    680:  * sign equals -1 if the number is negative (explicit plus is not allowed,
                    681:  * according to GNU sort's "info sort".
                    682:  * The number part before decimal point is in the smain, after the decimal
                    683:  * point is in sfrac, tail is the pointer to the remainder of the string.
                    684:  */
                    685: static int
                    686: read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
                    687: {
                    688:        bwstring_iterator s;
                    689:
                    690:        s = bws_begin(s0);
                    691:
                    692:        /* always end the fraction with zero, even if we have no fraction */
                    693:        sfrac[0] = 0;
                    694:
                    695:        while (iswblank(bws_get_iter_value(s)))
                    696:                s = bws_iterator_inc(s, 1);
                    697:
1.12    ! schwarze  698:        if (bws_get_iter_value(s) == L'-') {
1.1       millert   699:                *sign = -1;
                    700:                s = bws_iterator_inc(s, 1);
                    701:        }
                    702:
                    703:        // This is '0', not '\0', do not change this
                    704:        while (iswdigit(bws_get_iter_value(s)) &&
                    705:            (bws_get_iter_value(s) == L'0'))
                    706:                s = bws_iterator_inc(s, 1);
                    707:
                    708:        while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
                    709:                if (iswdigit(bws_get_iter_value(s))) {
                    710:                        smain[*main_len] = bws_get_iter_value(s);
                    711:                        s = bws_iterator_inc(s, 1);
                    712:                        *main_len += 1;
1.12    ! schwarze  713:                } else
1.1       millert   714:                        break;
                    715:        }
                    716:
                    717:        smain[*main_len] = 0;
                    718:
1.12    ! schwarze  719:        if (bws_get_iter_value(s) == L'.') {
1.1       millert   720:                s = bws_iterator_inc(s, 1);
                    721:                while (iswdigit(bws_get_iter_value(s)) &&
                    722:                    *frac_len < MAX_NUM_SIZE) {
                    723:                        sfrac[*frac_len] = bws_get_iter_value(s);
                    724:                        s = bws_iterator_inc(s, 1);
                    725:                        *frac_len += 1;
                    726:                }
                    727:                sfrac[*frac_len] = 0;
                    728:
                    729:                while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
                    730:                        --(*frac_len);
                    731:                        sfrac[*frac_len] = L'\0';
                    732:                }
                    733:        }
                    734:
                    735:        setsuffix(bws_get_iter_value(s), si);
                    736:
                    737:        if ((*main_len + *frac_len) == 0)
                    738:                *sign = 0;
                    739:
                    740:        return 0;
                    741: }
                    742:
                    743: /*
                    744:  * Implements string sort.
                    745:  */
                    746: static int
                    747: wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
                    748: {
                    749:
                    750:        if (debug_sort) {
                    751:                if (offset)
1.11      mmcc      752:                        printf("; offset=%zu\n", offset);
1.1       millert   753:                bwsprintf(stdout, kv1->k, "; k1=<", ">");
                    754:                printf("(%zu)", BWSLEN(kv1->k));
                    755:                bwsprintf(stdout, kv2->k, ", k2=<", ">");
                    756:                printf("(%zu)", BWSLEN(kv2->k));
                    757:        }
                    758:
                    759:        return bwscoll(kv1->k, kv2->k, offset);
                    760: }
                    761:
                    762: /*
                    763:  * Compare two suffixes
                    764:  */
                    765: static inline int
                    766: cmpsuffix(unsigned char si1, unsigned char si2)
                    767: {
                    768:        return (char)si1 - (char)si2;
                    769: }
                    770:
                    771: /*
                    772:  * Implements numeric sort for -n and -h.
                    773:  */
                    774: static int
                    775: numcoll_impl(struct key_value *kv1, struct key_value *kv2,
                    776:     size_t offset __unused, bool use_suffix)
                    777: {
                    778:        struct bwstring *s1, *s2;
                    779:        wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
                    780:        wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
                    781:        int cmp_res, sign1, sign2;
                    782:        size_t frac1, frac2, main1, main2;
                    783:        unsigned char SI1, SI2;
                    784:        bool e1, e2, key1_read, key2_read;
                    785:
                    786:        s1 = kv1->k;
                    787:        s2 = kv2->k;
                    788:        sign1 = sign2 = 0;
                    789:        main1 = main2 = 0;
                    790:        frac1 = frac2 = 0;
                    791:
                    792:        key1_read = key2_read = false;
                    793:
                    794:        if (debug_sort) {
                    795:                bwsprintf(stdout, s1, "; k1=<", ">");
                    796:                bwsprintf(stdout, s2, ", k2=<", ">");
                    797:        }
                    798:
                    799:        if (s1 == s2)
                    800:                return 0;
                    801:
                    802:        if (kv1->hint->status == HS_UNINITIALIZED) {
                    803:                /* read the number from the string */
                    804:                read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
                    805:                key1_read = true;
                    806:                kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
                    807:                if (main1 < 1 && frac1 < 1)
                    808:                        kv1->hint->v.nh.empty=true;
                    809:                kv1->hint->v.nh.si = SI1;
                    810:                kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
                    811:                    HS_INITIALIZED : HS_ERROR;
                    812:                kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
                    813:        }
                    814:
                    815:        if (kv2->hint->status == HS_UNINITIALIZED) {
                    816:                /* read the number from the string */
                    817:                read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
                    818:                key2_read = true;
                    819:                kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
                    820:                if (main2 < 1 && frac2 < 1)
                    821:                        kv2->hint->v.nh.empty=true;
                    822:                kv2->hint->v.nh.si = SI2;
                    823:                kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
                    824:                    HS_INITIALIZED : HS_ERROR;
                    825:                kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
                    826:        }
                    827:
                    828:        if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
                    829:            HS_INITIALIZED) {
                    830:                unsigned long long n1, n2;
                    831:                bool neg1, neg2;
                    832:
                    833:                e1 = kv1->hint->v.nh.empty;
                    834:                e2 = kv2->hint->v.nh.empty;
                    835:
                    836:                if (e1 && e2)
                    837:                        return 0;
                    838:
                    839:                neg1 = kv1->hint->v.nh.neg;
                    840:                neg2 = kv2->hint->v.nh.neg;
                    841:
                    842:                if (neg1 && !neg2)
                    843:                        return -1;
                    844:                if (neg2 && !neg1)
                    845:                        return 1;
                    846:
                    847:                if (e1)
                    848:                        return neg2 ? 1 : -1;
                    849:                else if (e2)
                    850:                        return neg1 ? -1 : 1;
                    851:
                    852:
                    853:                if (use_suffix) {
                    854:                        cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
                    855:                        if (cmp_res)
                    856:                                return neg1 ? -cmp_res : cmp_res;
                    857:                }
                    858:
                    859:                n1 = kv1->hint->v.nh.n1;
                    860:                n2 = kv2->hint->v.nh.n1;
                    861:                if (n1 < n2)
                    862:                        return neg1 ? 1 : -1;
                    863:                else if (n1 > n2)
                    864:                        return neg1 ? -1 : 1;
                    865:        }
                    866:
                    867:        /* read the numbers from the strings */
                    868:        if (!key1_read)
                    869:                read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
                    870:        if (!key2_read)
                    871:                read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
                    872:
                    873:        e1 = ((main1 + frac1) == 0);
                    874:        e2 = ((main2 + frac2) == 0);
                    875:
                    876:        if (e1 && e2)
                    877:                return 0;
                    878:
                    879:        /* we know the result if the signs are different */
                    880:        if (sign1 < 0 && sign2 >= 0)
                    881:                return -1;
                    882:        if (sign1 >= 0 && sign2 < 0)
                    883:                return 1;
                    884:
                    885:        if (e1)
                    886:                return (sign2 < 0) ? +1 : -1;
                    887:        else if (e2)
                    888:                return (sign1 < 0) ? -1 : 1;
                    889:
                    890:        if (use_suffix) {
                    891:                cmp_res = cmpsuffix(SI1, SI2);
                    892:                if (cmp_res)
                    893:                        return (sign1 < 0) ? -cmp_res : cmp_res;
                    894:        }
                    895:
                    896:        /* if both numbers are empty assume that the strings are equal */
                    897:        if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
                    898:                return 0;
                    899:
                    900:        /*
                    901:         * if the main part is of different size, we know the result
                    902:         * (because the leading zeros are removed)
                    903:         */
                    904:        if (main1 < main2)
                    905:                cmp_res = -1;
                    906:        else if (main1 > main2)
                    907:                cmp_res = +1;
                    908:        /* if the sizes are equal then simple non-collate string compare gives the correct result */
                    909:        else
                    910:                cmp_res = wcscmp(smain1, smain2);
                    911:
                    912:        /* check fraction */
                    913:        if (!cmp_res)
                    914:                cmp_res = wcscmp(sfrac1, sfrac2);
                    915:
                    916:        if (!cmp_res)
                    917:                return 0;
                    918:
                    919:        /* reverse result if the signs are negative */
                    920:        if (sign1 < 0 && sign2 < 0)
                    921:                cmp_res = -cmp_res;
                    922:
                    923:        return cmp_res;
                    924: }
                    925:
                    926: /*
                    927:  * Implements numeric sort (-n).
                    928:  */
                    929: static int
                    930: numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
                    931: {
                    932:        return numcoll_impl(kv1, kv2, offset, false);
                    933: }
                    934:
                    935: /*
                    936:  * Implements 'human' numeric sort (-h).
                    937:  */
                    938: static int
                    939: hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
                    940: {
                    941:        return numcoll_impl(kv1, kv2, offset, true);
                    942: }
                    943:
                    944: /*
                    945:  * Implements random sort (-R).
                    946:  */
                    947: static int
                    948: randomcoll(struct key_value *kv1, struct key_value *kv2,
                    949:     size_t offset __unused)
                    950: {
                    951:        struct bwstring *s1, *s2;
                    952:        MD5_CTX ctx1, ctx2;
                    953:        char *b1, *b2;
                    954:
                    955:        s1 = kv1->k;
                    956:        s2 = kv2->k;
                    957:
                    958:        if (debug_sort) {
                    959:                bwsprintf(stdout, s1, "; k1=<", ">");
                    960:                bwsprintf(stdout, s2, ", k2=<", ">");
                    961:        }
                    962:
                    963:        if (s1 == s2)
                    964:                return 0;
                    965:
                    966:        memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
                    967:        memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
                    968:
                    969:        MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
                    970:        MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
                    971:        b1 = MD5End(&ctx1, NULL);
                    972:        b2 = MD5End(&ctx2, NULL);
                    973:        if (b1 == NULL) {
                    974:                if (b2 == NULL)
                    975:                        return 0;
                    976:                else {
                    977:                        sort_free(b2);
                    978:                        return -1;
                    979:                }
                    980:        } else if (b2 == NULL) {
                    981:                sort_free(b1);
                    982:                return 1;
                    983:        } else {
                    984:                int cmp_res;
                    985:
                    986:                cmp_res = strcmp(b1, b2);
                    987:                sort_free(b1);
                    988:                sort_free(b2);
                    989:
                    990:                if (!cmp_res)
                    991:                        cmp_res = bwscoll(s1, s2, 0);
                    992:
                    993:                return cmp_res;
                    994:        }
                    995: }
                    996:
                    997: /*
                    998:  * Implements version sort (-V).
                    999:  */
                   1000: static int
                   1001: versioncoll(struct key_value *kv1, struct key_value *kv2,
                   1002:     size_t offset __unused)
                   1003: {
                   1004:        struct bwstring *s1, *s2;
                   1005:
                   1006:        s1 = kv1->k;
                   1007:        s2 = kv2->k;
                   1008:
                   1009:        if (debug_sort) {
                   1010:                bwsprintf(stdout, s1, "; k1=<", ">");
                   1011:                bwsprintf(stdout, s2, ", k2=<", ">");
                   1012:        }
                   1013:
                   1014:        if (s1 == s2)
                   1015:                return 0;
                   1016:
                   1017:        return vcmp(s1, s2);
                   1018: }
                   1019:
                   1020: /*
                   1021:  * Check for minus infinity
                   1022:  */
                   1023: static inline bool
                   1024: huge_minus(double d, int err1)
                   1025: {
                   1026:        if (err1 == ERANGE)
                   1027:                if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
                   1028:                        return 1;
                   1029:
                   1030:        return 0;
                   1031: }
                   1032:
                   1033: /*
                   1034:  * Check for plus infinity
                   1035:  */
                   1036: static inline bool
                   1037: huge_plus(double d, int err1)
                   1038: {
                   1039:        if (err1 == ERANGE)
                   1040:                if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
                   1041:                        return 1;
                   1042:
                   1043:        return 0;
                   1044: }
                   1045:
                   1046: /*
                   1047:  * Check whether a function is a NAN
                   1048:  */
                   1049: static bool
                   1050: is_nan(double d)
                   1051: {
1.3       miod     1052: #if defined(NAN)
1.1       millert  1053:        return (d == NAN || isnan(d));
1.3       miod     1054: #else
                   1055:        return (isnan(d));
                   1056: #endif
1.1       millert  1057: }
                   1058:
                   1059: /*
                   1060:  * Compare two NANs
                   1061:  */
                   1062: static int
                   1063: cmp_nans(double d1, double d2)
                   1064: {
                   1065:        if (d1 == d2)
                   1066:                return 0;
                   1067:        return d1 < d2 ? -1 : 1;
                   1068: }
                   1069:
                   1070: /*
                   1071:  * Implements general numeric sort (-g).
                   1072:  */
                   1073: static int
                   1074: gnumcoll(struct key_value *kv1, struct key_value *kv2,
                   1075:     size_t offset __unused)
                   1076: {
                   1077:        double d1, d2;
                   1078:        int err1, err2;
                   1079:        bool empty1, empty2, key1_read, key2_read;
                   1080:
                   1081:        d1 = d2 = 0;
                   1082:        err1 = err2 = 0;
                   1083:        key1_read = key2_read = false;
                   1084:
                   1085:        if (debug_sort) {
                   1086:                bwsprintf(stdout, kv1->k, "; k1=<", ">");
                   1087:                bwsprintf(stdout, kv2->k, "; k2=<", ">");
                   1088:        }
                   1089:
                   1090:        if (kv1->hint->status == HS_UNINITIALIZED) {
                   1091:                errno = 0;
                   1092:                d1 = bwstod(kv1->k, &empty1);
                   1093:                err1 = errno;
                   1094:
1.10      millert  1095:                if (empty1) {
1.1       millert  1096:                        kv1->hint->v.gh.notnum = true;
1.10      millert  1097:                        kv1->hint->status = HS_INITIALIZED;
                   1098:                } else if (err1 == 0) {
1.1       millert  1099:                        kv1->hint->v.gh.d = d1;
                   1100:                        kv1->hint->v.gh.nan = is_nan(d1);
                   1101:                        kv1->hint->status = HS_INITIALIZED;
                   1102:                } else
                   1103:                        kv1->hint->status = HS_ERROR;
                   1104:
                   1105:                key1_read = true;
                   1106:        }
                   1107:
                   1108:        if (kv2->hint->status == HS_UNINITIALIZED) {
                   1109:                errno = 0;
                   1110:                d2 = bwstod(kv2->k, &empty2);
                   1111:                err2 = errno;
                   1112:
1.10      millert  1113:                if (empty2) {
1.1       millert  1114:                        kv2->hint->v.gh.notnum = true;
1.10      millert  1115:                        kv2->hint->status = HS_INITIALIZED;
                   1116:                } else if (err2 == 0) {
1.1       millert  1117:                        kv2->hint->v.gh.d = d2;
                   1118:                        kv2->hint->v.gh.nan = is_nan(d2);
                   1119:                        kv2->hint->status = HS_INITIALIZED;
                   1120:                } else
                   1121:                        kv2->hint->status = HS_ERROR;
                   1122:
                   1123:                key2_read = true;
                   1124:        }
                   1125:
                   1126:        if (kv1->hint->status == HS_INITIALIZED &&
                   1127:            kv2->hint->status == HS_INITIALIZED) {
1.10      millert  1128: #ifdef GNUSORT_COMPATIBILITY
1.1       millert  1129:                if (kv1->hint->v.gh.notnum)
                   1130:                        return kv2->hint->v.gh.notnum ? 0 : -1;
                   1131:                else if (kv2->hint->v.gh.notnum)
                   1132:                        return 1;
1.10      millert  1133: #else
                   1134:                if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum)
                   1135:                        return 0;
                   1136: #endif
1.1       millert  1137:
                   1138:                if (kv1->hint->v.gh.nan)
                   1139:                        return kv2->hint->v.gh.nan ?
                   1140:                            cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
                   1141:                else if (kv2->hint->v.gh.nan)
                   1142:                        return 1;
                   1143:
                   1144:                d1 = kv1->hint->v.gh.d;
                   1145:                d2 = kv2->hint->v.gh.d;
                   1146:
                   1147:                if (d1 < d2)
                   1148:                        return -1;
                   1149:                else if (d1 > d2)
                   1150:                        return 1;
                   1151:                else
                   1152:                        return 0;
                   1153:        }
                   1154:
                   1155:        if (!key1_read) {
                   1156:                errno = 0;
                   1157:                d1 = bwstod(kv1->k, &empty1);
                   1158:                err1 = errno;
                   1159:        }
                   1160:
                   1161:        if (!key2_read) {
                   1162:                errno = 0;
                   1163:                d2 = bwstod(kv2->k, &empty2);
                   1164:                err2 = errno;
                   1165:        }
                   1166:
1.10      millert  1167:        /* Non-value case */
                   1168: #ifdef GNUSORT_COMPATIBILITY
1.1       millert  1169:        if (empty1)
                   1170:                return empty2 ? 0 : -1;
                   1171:        else if (empty2)
                   1172:                return 1;
1.10      millert  1173: #else
                   1174:        if (empty1 && empty2)
                   1175:                return 0;
                   1176: #endif
1.1       millert  1177:
                   1178:        /* NAN case */
                   1179:        if (is_nan(d1))
                   1180:                return is_nan(d2) ? cmp_nans(d1, d2) : -1;
                   1181:        else if (is_nan(d2))
                   1182:                return 1;
                   1183:
                   1184:        /* Infinities */
                   1185:        if (err1 == ERANGE || err2 == ERANGE) {
                   1186:                /* Minus infinity case */
                   1187:                if (huge_minus(d1, err1)) {
                   1188:                        if (huge_minus(d2, err2)) {
                   1189:                                if (d1 == d2)
                   1190:                                        return 0;
                   1191:                                return d1 < d2 ? -1 : 1;
                   1192:                        } else
                   1193:                                return -1;
                   1194:
                   1195:                } else if (huge_minus(d2, err2)) {
                   1196:                        if (huge_minus(d1, err1)) {
                   1197:                                if (d1 == d2)
                   1198:                                        return 0;
                   1199:                                return d1 < d2 ? -1 : 1;
                   1200:                        } else
                   1201:                                return 1;
                   1202:                }
                   1203:
                   1204:                /* Plus infinity case */
                   1205:                if (huge_plus(d1, err1)) {
                   1206:                        if (huge_plus(d2, err2)) {
                   1207:                                if (d1 == d2)
                   1208:                                        return 0;
                   1209:                                return d1 < d2 ? -1 : 1;
                   1210:                        } else
                   1211:                                return 1;
                   1212:                } else if (huge_plus(d2, err2)) {
                   1213:                        if (huge_plus(d1, err1)) {
                   1214:                                if (d1 == d2)
                   1215:                                        return 0;
                   1216:                                return d1 < d2 ? -1 : 1;
                   1217:                        } else
                   1218:                                return -1;
                   1219:                }
                   1220:        }
                   1221:
                   1222:        if (d1 == d2)
                   1223:                return 0;
                   1224:        return d1 < d2 ? -1 : 1;
                   1225: }
                   1226:
                   1227: /*
                   1228:  * Implements month sort (-M).
                   1229:  */
                   1230: static int
                   1231: monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
                   1232: {
                   1233:        int val1, val2;
                   1234:        bool key1_read, key2_read;
                   1235:
                   1236:        val1 = val2 = 0;
                   1237:        key1_read = key2_read = false;
                   1238:
                   1239:        if (debug_sort) {
                   1240:                bwsprintf(stdout, kv1->k, "; k1=<", ">");
                   1241:                bwsprintf(stdout, kv2->k, "; k2=<", ">");
                   1242:        }
                   1243:
                   1244:        if (kv1->hint->status == HS_UNINITIALIZED) {
                   1245:                kv1->hint->v.Mh.m = bws_month_score(kv1->k);
                   1246:                key1_read = true;
                   1247:                kv1->hint->status = HS_INITIALIZED;
                   1248:        }
                   1249:
                   1250:        if (kv2->hint->status == HS_UNINITIALIZED) {
                   1251:                kv2->hint->v.Mh.m = bws_month_score(kv2->k);
                   1252:                key2_read = true;
                   1253:                kv2->hint->status = HS_INITIALIZED;
                   1254:        }
                   1255:
                   1256:        if (kv1->hint->status == HS_INITIALIZED) {
                   1257:                val1 = kv1->hint->v.Mh.m;
                   1258:                key1_read = true;
                   1259:        }
                   1260:
                   1261:        if (kv2->hint->status == HS_INITIALIZED) {
                   1262:                val2 = kv2->hint->v.Mh.m;
                   1263:                key2_read = true;
                   1264:        }
                   1265:
                   1266:        if (!key1_read)
                   1267:                val1 = bws_month_score(kv1->k);
                   1268:        if (!key2_read)
                   1269:                val2 = bws_month_score(kv2->k);
                   1270:
                   1271:        if (val1 == val2)
                   1272:                return 0;
                   1273:        return val1 < val2 ? -1 : 1;
                   1274: }