src/usr.bin/sort/bwstring.c - annotate

Return to bwstring.c CVS log
Up to [local] / src / usr.bin / sort
Annotation of src/usr.bin/sort/bwstring.c, Revision 1.9

1.9     ! schwarze    1: /*     $OpenBSD: bwstring.c,v 1.8 2019/05/15 09:07:46 schwarze Exp $   */
1.1       millert     2:
                      3: /*-
                      4:  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
                      5:  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  */
                     29:
                     30: #include <ctype.h>
                     31: #include <errno.h>
                     32: #include <err.h>
                     33: #include <langinfo.h>
                     34: #include <math.h>
                     35: #include <stdlib.h>
                     36: #include <string.h>
                     37: #include <wchar.h>
                     38: #include <wctype.h>
                     39:
                     40: #include "bwstring.h"
                     41: #include "sort.h"
                     42:
                     43: static wchar_t **wmonths;
                     44: static char **cmonths;
                     45:
                     46: /* initialise months */
                     47:
                     48: void
                     49: initialise_months(void)
                     50: {
                     51:        const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
                     52:            ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
                     53:            ABMON_11, ABMON_12 };
                     54:        char *tmp;
                     55:        size_t len;
                     56:
                     57:        if (sort_mb_cur_max == 1) {
                     58:                if (cmonths == NULL) {
                     59:                        char *m;
                     60:                        unsigned int j;
                     61:                        int i;
                     62:
                     63:                        cmonths = sort_malloc(sizeof(char *) * 12);
                     64:                        for (i = 0; i < 12; i++) {
                     65:                                cmonths[i] = NULL;
                     66:                                tmp = nl_langinfo(item[i]);
                     67:                                if (debug_sort)
                     68:                                        printf("month[%d]=%s\n", i, tmp);
1.3       millert    69:                                if (*tmp == '\0')
1.1       millert    70:                                        continue;
1.3       millert    71:                                m = sort_strdup(tmp);
1.4       millert    72:                                len = strlen(tmp);
1.1       millert    73:                                for (j = 0; j < len; j++)
                     74:                                        m[j] = toupper(m[j]);
                     75:                                cmonths[i] = m;
                     76:                        }
                     77:                }
                     78:        } else {
                     79:                if (wmonths == NULL) {
                     80:                        unsigned int j;
                     81:                        wchar_t *m;
                     82:                        int i;
                     83:
                     84:                        wmonths = sort_malloc(sizeof(wchar_t *) * 12);
                     85:                        for (i = 0; i < 12; i++) {
                     86:                                wmonths[i] = NULL;
                     87:                                tmp = nl_langinfo(item[i]);
                     88:                                if (debug_sort)
                     89:                                        printf("month[%d]=%s\n", i, tmp);
1.3       millert    90:                                if (*tmp == '\0')
                     91:                                        continue;
1.1       millert    92:                                len = strlen(tmp);
1.7       millert    93:                                m = sort_reallocarray(NULL, len + 1,
                     94:                                    sizeof(wchar_t));
1.3       millert    95:                                if (mbstowcs(m, tmp, len) == (size_t)-1) {
                     96:                                        sort_free(m);
1.1       millert    97:                                        continue;
1.3       millert    98:                                }
1.1       millert    99:                                m[len] = L'\0';
                    100:                                for (j = 0; j < len; j++)
                    101:                                        m[j] = towupper(m[j]);
                    102:                                wmonths[i] = m;
                    103:                        }
                    104:                }
                    105:        }
                    106: }
                    107:
                    108: /*
                    109:  * Compare two wide-character strings
                    110:  */
                    111: static int
                    112: wide_str_coll(const wchar_t *s1, const wchar_t *s2)
                    113: {
                    114:        int ret = 0;
                    115:
                    116:        errno = 0;
                    117:        ret = wcscoll(s1, s2);
                    118:        if (errno == EILSEQ) {
                    119:                errno = 0;
                    120:                ret = wcscmp(s1, s2);
                    121:                if (errno != 0) {
                    122:                        size_t i;
                    123:                        for (i = 0; ; ++i) {
                    124:                                wchar_t c1 = s1[i];
                    125:                                wchar_t c2 = s2[i];
                    126:                                if (c1 == L'\0')
                    127:                                        return (c2 == L'\0') ? 0 : -1;
                    128:                                if (c2 == L'\0')
                    129:                                        return 1;
                    130:                                if (c1 == c2)
                    131:                                        continue;
                    132:                                return (int)c1 - (int)c2;
                    133:                        }
                    134:                }
                    135:        }
                    136:        return ret;
                    137: }
                    138:
                    139: /* counterparts of wcs functions */
                    140:
                    141: void
                    142: bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
                    143: {
                    144:        if (sort_mb_cur_max == 1)
                    145:                fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix);
                    146:        else
                    147:                fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix);
                    148: }
                    149:
                    150: const void *
                    151: bwsrawdata(const struct bwstring *bws)
                    152: {
                    153:        return &(bws->data);
                    154: }
                    155:
                    156: size_t
                    157: bwsrawlen(const struct bwstring *bws)
                    158: {
                    159:        return (sort_mb_cur_max == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len);
                    160: }
                    161:
                    162: size_t
                    163: bws_memsize(const struct bwstring *bws)
                    164: {
                    165:        return (sort_mb_cur_max == 1) ? (bws->len + 2 + sizeof(struct bwstring)) :
                    166:            (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring));
                    167: }
                    168:
                    169: void
                    170: bws_setlen(struct bwstring *bws, size_t newlen)
                    171: {
                    172:        if (bws && newlen != bws->len && newlen <= bws->len) {
                    173:                bws->len = newlen;
                    174:                if (sort_mb_cur_max == 1)
                    175:                        bws->data.cstr[newlen] = '\0';
                    176:                else
                    177:                        bws->data.wstr[newlen] = L'\0';
                    178:        }
                    179: }
                    180:
                    181: /*
                    182:  * Allocate a new binary string of specified size
                    183:  */
                    184: struct bwstring *
                    185: bwsalloc(size_t sz)
                    186: {
                    187:        struct bwstring *ret;
                    188:
                    189:        if (sort_mb_cur_max == 1) {
                    190:                ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
                    191:                ret->data.cstr[sz] = '\0';
                    192:        } else {
                    193:                ret = sort_malloc(sizeof(struct bwstring) +
                    194:                    SIZEOF_WCHAR_STRING(sz + 1));
                    195:                ret->data.wstr[sz] = L'\0';
                    196:        }
                    197:        ret->len = sz;
                    198:
                    199:        return ret;
                    200: }
                    201:
                    202: /*
                    203:  * Create a copy of binary string.
                    204:  * New string size equals the length of the old string.
                    205:  */
                    206: struct bwstring *
                    207: bwsdup(const struct bwstring *s)
                    208: {
                    209:        struct bwstring *ret;
                    210:
                    211:        if (s == NULL)
                    212:                return NULL;
                    213:
                    214:        ret = bwsalloc(s->len);
                    215:
                    216:        if (sort_mb_cur_max == 1)
                    217:                memcpy(ret->data.cstr, s->data.cstr, s->len);
                    218:        else
                    219:                memcpy(ret->data.wstr, s->data.wstr,
                    220:                    SIZEOF_WCHAR_STRING(s->len));
                    221:
                    222:        return ret;
                    223: }
                    224:
                    225: /*
1.5       millert   226:  * Create a new binary string from a wide character buffer.
1.1       millert   227:  */
                    228: struct bwstring *
                    229: bwssbdup(const wchar_t *str, size_t len)
                    230: {
                    231:        if (str == NULL)
                    232:                return (len == 0) ? bwsalloc(0) : NULL;
                    233:        else {
                    234:                struct bwstring *ret;
                    235:                size_t i;
                    236:
                    237:                ret = bwsalloc(len);
                    238:
                    239:                if (sort_mb_cur_max == 1)
                    240:                        for (i = 0; i < len; ++i)
                    241:                                ret->data.cstr[i] = (unsigned char) str[i];
                    242:                else
                    243:                        memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len));
                    244:
                    245:                return ret;
                    246:        }
                    247: }
                    248:
                    249: /*
                    250:  * Create a new binary string from a raw binary buffer.
                    251:  */
                    252: struct bwstring *
                    253: bwscsbdup(const unsigned char *str, size_t len)
                    254: {
                    255:        struct bwstring *ret;
                    256:
                    257:        ret = bwsalloc(len);
                    258:
                    259:        if (str) {
                    260:                if (sort_mb_cur_max == 1)
                    261:                        memcpy(ret->data.cstr, str, len);
                    262:                else {
                    263:                        mbstate_t mbs;
                    264:                        const char *s;
                    265:                        size_t charlen, chars, cptr;
                    266:
                    267:                        chars = 0;
                    268:                        cptr = 0;
                    269:                        s = (const char *) str;
                    270:
                    271:                        memset(&mbs, 0, sizeof(mbs));
                    272:
                    273:                        while (cptr < len) {
                    274:                                size_t n = sort_mb_cur_max;
                    275:
                    276:                                if (n > len - cptr)
                    277:                                        n = len - cptr;
                    278:                                charlen = mbrlen(s + cptr, n, &mbs);
                    279:                                switch (charlen) {
                    280:                                case 0:
                    281:                                        /* FALLTHROUGH */
                    282:                                case (size_t) -1:
                    283:                                        /* FALLTHROUGH */
                    284:                                case (size_t) -2:
                    285:                                        ret->data.wstr[chars++] =
                    286:                                            (unsigned char) s[cptr];
                    287:                                        ++cptr;
                    288:                                        break;
                    289:                                default:
                    290:                                        n = mbrtowc(ret->data.wstr + (chars++),
                    291:                                            s + cptr, charlen, &mbs);
                    292:                                        if ((n == (size_t)-1) || (n == (size_t)-2))
                    293:                                                /* NOTREACHED */
                    294:                                                err(2, "mbrtowc error");
                    295:                                        cptr += charlen;
                    296:                                };
                    297:                        }
                    298:
                    299:                        ret->len = chars;
                    300:                        ret->data.wstr[ret->len] = L'\0';
                    301:                }
                    302:        }
                    303:        return ret;
                    304: }
                    305:
                    306: /*
                    307:  * De-allocate object memory
                    308:  */
                    309: void
                    310: bwsfree(struct bwstring *s)
                    311: {
                    312:        sort_free(s);
                    313: }
                    314:
                    315: /*
                    316:  * Copy content of src binary string to dst.
                    317:  * If the capacity of the dst string is not sufficient,
                    318:  * then the data is truncated.
                    319:  */
                    320: size_t
                    321: bwscpy(struct bwstring *dst, const struct bwstring *src)
                    322: {
                    323:        size_t nums = src->len;
                    324:
                    325:        if (nums > dst->len)
                    326:                nums = dst->len;
                    327:        dst->len = nums;
                    328:
                    329:        if (sort_mb_cur_max == 1) {
                    330:                memcpy(dst->data.cstr, src->data.cstr, nums);
                    331:                dst->data.cstr[dst->len] = '\0';
                    332:        } else {
                    333:                memcpy(dst->data.wstr, src->data.wstr,
                    334:                    SIZEOF_WCHAR_STRING(nums + 1));
                    335:                dst->data.wstr[dst->len] = L'\0';
                    336:        }
                    337:
                    338:        return nums;
                    339: }
                    340:
                    341: /*
                    342:  * Copy content of src binary string to dst,
                    343:  * with specified number of symbols to be copied.
                    344:  * If the capacity of the dst string is not sufficient,
                    345:  * then the data is truncated.
                    346:  */
                    347: struct bwstring *
                    348: bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
                    349: {
                    350:        size_t nums = src->len;
                    351:
                    352:        if (nums > dst->len)
                    353:                nums = dst->len;
                    354:        if (nums > size)
                    355:                nums = size;
                    356:        dst->len = nums;
                    357:
                    358:        if (sort_mb_cur_max == 1) {
                    359:                memcpy(dst->data.cstr, src->data.cstr, nums);
                    360:                dst->data.cstr[dst->len] = '\0';
                    361:        } else {
                    362:                memcpy(dst->data.wstr, src->data.wstr,
                    363:                    SIZEOF_WCHAR_STRING(nums + 1));
                    364:                dst->data.wstr[dst->len] = L'\0';
                    365:        }
                    366:
                    367:        return dst;
                    368: }
                    369:
                    370: /*
                    371:  * Copy content of src binary string to dst,
                    372:  * with specified number of symbols to be copied.
                    373:  * An offset value can be specified, from the start of src string.
                    374:  * If the capacity of the dst string is not sufficient,
                    375:  * then the data is truncated.
                    376:  */
                    377: struct bwstring *
                    378: bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
                    379:     size_t size)
                    380: {
                    381:        if (offset >= src->len) {
                    382:                dst->data.wstr[0] = 0;
                    383:                dst->len = 0;
                    384:        } else {
                    385:                size_t nums = src->len - offset;
                    386:
                    387:                if (nums > dst->len)
                    388:                        nums = dst->len;
                    389:                if (nums > size)
                    390:                        nums = size;
                    391:                dst->len = nums;
                    392:                if (sort_mb_cur_max == 1) {
                    393:                        memcpy(dst->data.cstr, src->data.cstr + offset,
                    394:                            (nums));
                    395:                        dst->data.cstr[dst->len] = '\0';
                    396:                } else {
                    397:                        memcpy(dst->data.wstr, src->data.wstr + offset,
                    398:                            SIZEOF_WCHAR_STRING(nums));
                    399:                        dst->data.wstr[dst->len] = L'\0';
                    400:                }
                    401:        }
                    402:        return dst;
                    403: }
                    404:
                    405: /*
                    406:  * Write binary string to the file.
                    407:  * The output is ended either with '\n' (nl == true)
                    408:  * or '\0' (nl == false).
                    409:  */
                    410: size_t
                    411: bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
                    412: {
                    413:        if (sort_mb_cur_max == 1) {
                    414:                size_t len = bws->len;
                    415:
                    416:                if (!zero_ended) {
                    417:                        bws->data.cstr[len] = '\n';
                    418:
                    419:                        if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
                    420:                                err(2, NULL);
                    421:
                    422:                        bws->data.cstr[len] = '\0';
                    423:                } else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
                    424:                        err(2, NULL);
                    425:
                    426:                return len + 1;
                    427:
                    428:        } else {
                    429:                wchar_t eols;
                    430:                size_t printed = 0;
                    431:
                    432:                eols = zero_ended ? btowc('\0') : btowc('\n');
                    433:
                    434:                while (printed < BWSLEN(bws)) {
                    435:                        const wchar_t *s = bws->data.wstr + printed;
                    436:
                    437:                        if (*s == L'\0') {
                    438:                                int nums;
                    439:
                    440:                                nums = fwprintf(f, L"%lc", *s);
                    441:
                    442:                                if (nums != 1)
                    443:                                        err(2, NULL);
                    444:                                ++printed;
                    445:                        } else {
                    446:                                int nums;
                    447:
                    448:                                nums = fwprintf(f, L"%ls", s);
                    449:
                    450:                                if (nums < 1)
                    451:                                        err(2, NULL);
                    452:                                printed += nums;
                    453:                        }
                    454:                }
                    455:                fwprintf(f, L"%lc", eols);
                    456:                return printed + 1;
                    457:        }
                    458: }
                    459:
                    460: /*
                    461:  * Allocate and read a binary string from file.
                    462:  * The strings are nl-ended or zero-ended, depending on the sort setting.
                    463:  */
                    464: struct bwstring *
                    465: bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
                    466: {
                    467:        wint_t eols;
                    468:
                    469:        eols = zero_ended ? btowc('\0') : btowc('\n');
                    470:
                    471:        if (!zero_ended && (sort_mb_cur_max > 1)) {
                    472:                wchar_t *ret;
                    473:
                    474:                ret = fgetwln(f, len);
                    475:
                    476:                if (ret == NULL) {
                    477:                        if (!feof(f))
                    478:                                err(2, NULL);
                    479:                        return NULL;
                    480:                }
                    481:                if (*len > 0) {
                    482:                        if (ret[*len - 1] == (wchar_t)eols)
                    483:                                --(*len);
                    484:                }
                    485:                return bwssbdup(ret, *len);
                    486:
                    487:        } else if (!zero_ended && (sort_mb_cur_max == 1)) {
                    488:                char *ret;
                    489:
                    490:                ret = fgetln(f, len);
                    491:
                    492:                if (ret == NULL) {
                    493:                        if (!feof(f))
                    494:                                err(2, NULL);
                    495:                        return NULL;
                    496:                }
                    497:                if (*len > 0) {
                    498:                        if (ret[*len - 1] == '\n')
                    499:                                --(*len);
                    500:                }
                    501:                return bwscsbdup((unsigned char *)ret, *len);
                    502:
                    503:        } else {
                    504:                *len = 0;
                    505:
                    506:                if (feof(f))
                    507:                        return NULL;
                    508:
                    509:                if (2 >= rb->fgetwln_z_buffer_size) {
                    510:                        rb->fgetwln_z_buffer_size += 256;
                    511:                        rb->fgetwln_z_buffer =
                    512:                            sort_reallocarray(rb->fgetwln_z_buffer,
                    513:                            rb->fgetwln_z_buffer_size, sizeof(wchar_t));
                    514:                }
                    515:                rb->fgetwln_z_buffer[*len] = 0;
                    516:
1.6       millert   517:                if (sort_mb_cur_max == 1) {
1.1       millert   518:                        while (!feof(f)) {
                    519:                                int c;
                    520:
                    521:                                c = fgetc(f);
                    522:
                    523:                                if (c == EOF) {
                    524:                                        if (*len == 0)
                    525:                                                return NULL;
                    526:                                        goto line_read_done;
                    527:                                }
                    528:                                if (c == eols)
                    529:                                        goto line_read_done;
                    530:
                    531:                                if (*len + 1 >= rb->fgetwln_z_buffer_size) {
                    532:                                        rb->fgetwln_z_buffer_size += 256;
                    533:                                        rb->fgetwln_z_buffer =
                    534:                                            sort_reallocarray(rb->fgetwln_z_buffer,
                    535:                                            rb->fgetwln_z_buffer_size, sizeof(wchar_t));
                    536:                                }
                    537:
                    538:                                rb->fgetwln_z_buffer[*len] = c;
                    539:                                rb->fgetwln_z_buffer[++(*len)] = 0;
                    540:                        }
1.6       millert   541:                } else {
1.1       millert   542:                        while (!feof(f)) {
                    543:                                wint_t c = 0;
                    544:
                    545:                                c = fgetwc(f);
                    546:
                    547:                                if (c == WEOF) {
                    548:                                        if (*len == 0)
                    549:                                                return NULL;
                    550:                                        goto line_read_done;
                    551:                                }
                    552:                                if (c == eols)
                    553:                                        goto line_read_done;
                    554:
                    555:                                if (*len + 1 >= rb->fgetwln_z_buffer_size) {
                    556:                                        rb->fgetwln_z_buffer_size += 256;
                    557:                                        rb->fgetwln_z_buffer =
                    558:                                            sort_reallocarray(rb->fgetwln_z_buffer,
                    559:                                            rb->fgetwln_z_buffer_size, sizeof(wchar_t));
                    560:                                }
                    561:
                    562:                                rb->fgetwln_z_buffer[*len] = c;
                    563:                                rb->fgetwln_z_buffer[++(*len)] = 0;
                    564:                        }
1.6       millert   565:                }
1.1       millert   566:
                    567: line_read_done:
                    568:                /* we do not count the last 0 */
                    569:                return bwssbdup(rb->fgetwln_z_buffer, *len);
                    570:        }
                    571: }
                    572:
                    573: int
                    574: bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
                    575:     size_t offset, size_t len)
                    576: {
                    577:        size_t cmp_len, len1, len2;
                    578:        int res = 0;
                    579:
                    580:        len1 = bws1->len;
                    581:        len2 = bws2->len;
                    582:
                    583:        if (len1 <= offset) {
                    584:                return (len2 <= offset) ? 0 : -1;
                    585:        } else {
                    586:                if (len2 <= offset)
                    587:                        return 1;
                    588:                else {
                    589:                        len1 -= offset;
                    590:                        len2 -= offset;
                    591:
                    592:                        cmp_len = len1;
                    593:
                    594:                        if (len2 < cmp_len)
                    595:                                cmp_len = len2;
                    596:
                    597:                        if (len < cmp_len)
                    598:                                cmp_len = len;
                    599:
                    600:                        if (sort_mb_cur_max == 1) {
                    601:                                const unsigned char *s1, *s2;
                    602:
                    603:                                s1 = bws1->data.cstr + offset;
                    604:                                s2 = bws2->data.cstr + offset;
                    605:
                    606:                                res = memcmp(s1, s2, cmp_len);
                    607:
                    608:                        } else {
                    609:                                const wchar_t *s1, *s2;
                    610:
                    611:                                s1 = bws1->data.wstr + offset;
                    612:                                s2 = bws2->data.wstr + offset;
                    613:
                    614:                                res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
                    615:                        }
                    616:                }
                    617:        }
                    618:
                    619:        if (res == 0) {
                    620:                if (len1 < cmp_len && len1 < len2)
                    621:                        res = -1;
                    622:                else if (len2 < cmp_len && len2 < len1)
                    623:                        res = +1;
                    624:        }
                    625:
                    626:        return res;
                    627: }
                    628:
                    629: int
                    630: bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
                    631: {
                    632:        size_t len1, len2, cmp_len;
                    633:        int res;
                    634:
                    635:        len1 = bws1->len;
                    636:        len2 = bws2->len;
                    637:
                    638:        len1 -= offset;
                    639:        len2 -= offset;
                    640:
                    641:        cmp_len = len1;
                    642:
                    643:        if (len2 < cmp_len)
                    644:                cmp_len = len2;
                    645:
                    646:        res = bwsncmp(bws1, bws2, offset, cmp_len);
                    647:
                    648:        if (res == 0) {
                    649:                if (len1 < len2)
                    650:                        res = -1;
                    651:                else if (len2 < len1)
                    652:                        res = +1;
                    653:        }
                    654:
                    655:        return res;
                    656: }
                    657:
                    658: int
                    659: bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
                    660: {
                    661:        wchar_t c1, c2;
                    662:        size_t i = 0;
                    663:
                    664:        for (i = 0; i < len; ++i) {
                    665:                c1 = bws_get_iter_value(iter1);
                    666:                c2 = bws_get_iter_value(iter2);
                    667:                if (c1 != c2)
                    668:                        return c1 - c2;
                    669:                iter1 = bws_iterator_inc(iter1, 1);
                    670:                iter2 = bws_iterator_inc(iter2, 1);
                    671:        }
                    672:
                    673:        return 0;
                    674: }
                    675:
                    676: int
                    677: bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
                    678: {
                    679:        size_t len1, len2;
                    680:
                    681:        len1 = bws1->len;
                    682:        len2 = bws2->len;
                    683:
                    684:        if (len1 <= offset)
                    685:                return (len2 <= offset) ? 0 : -1;
1.8       schwarze  686:
1.9     ! schwarze  687:        if (len2 <= offset)
        !           688:                return 1;
1.8       schwarze  689:
1.9     ! schwarze  690:        len1 -= offset;
        !           691:        len2 -= offset;
1.1       millert   692:
1.9     ! schwarze  693:        if (sort_mb_cur_max == 1) {
        !           694:                const unsigned char *s1, *s2;
        !           695:                int res;
1.1       millert   696:
1.9     ! schwarze  697:                s1 = bws1->data.cstr + offset;
        !           698:                s2 = bws2->data.cstr + offset;
1.1       millert   699:
1.9     ! schwarze  700:                if (len1 > len2) {
        !           701:                        res = memcmp(s1, s2, len2);
        !           702:                        if (!res)
        !           703:                                res = +1;
        !           704:                } else if (len1 < len2) {
        !           705:                        res = memcmp(s1, s2, len1);
        !           706:                        if (!res)
        !           707:                                res = -1;
        !           708:                } else
        !           709:                        res = memcmp(s1, s2, len1);
1.1       millert   710:
1.9     ! schwarze  711:                return res;
        !           712:        } else {
        !           713:                const wchar_t *s1, *s2;
        !           714:                size_t i, maxlen;
        !           715:                int res = 0;
        !           716:
        !           717:                s1 = bws1->data.wstr + offset;
        !           718:                s2 = bws2->data.wstr + offset;
        !           719:
        !           720:                i = 0;
        !           721:                maxlen = len1;
        !           722:
        !           723:                if (maxlen > len2)
        !           724:                        maxlen = len2;
        !           725:
        !           726:                while (i < maxlen) {
        !           727:
        !           728:                        /* goto next non-zero part: */
        !           729:                        while (i < maxlen &&
        !           730:                            s1[i] == L'\0' && s2[i] == L'\0')
        !           731:                                ++i;
        !           732:
        !           733:                        if (i >= maxlen)
        !           734:                                break;
        !           735:
        !           736:                        if (s1[i] == L'\0') {
        !           737:                                if (s2[i] == L'\0')
        !           738:                                        /* NOTREACHED */
        !           739:                                        err(2, "bwscoll error 1");
        !           740:                                else
        !           741:                                        return -1;
        !           742:                        } else if (s2[i] == L'\0')
        !           743:                                return 1;
        !           744:
        !           745:                        res = wide_str_coll(s1 + i, s2 + i);
        !           746:                        if (res)
        !           747:                                return res;
        !           748:
        !           749:                        while (i < maxlen && s1[i] != L'\0' && s2[i] != L'\0')
        !           750:                                ++i;
        !           751:
        !           752:                        if (i >= maxlen)
        !           753:                                break;
        !           754:
        !           755:                        if (s1[i] == L'\0') {
        !           756:                                if (s2[i] == L'\0') {
        !           757:                                        ++i;
        !           758:                                        continue;
        !           759:                                } else
        !           760:                                        return -1;
        !           761:                        } else if (s2[i] == L'\0')
        !           762:                                return 1;
        !           763:                        else
        !           764:                                /* NOTREACHED */
        !           765:                                err(2, "bwscoll error 2");
        !           766:                }
1.1       millert   767:
1.9     ! schwarze  768:                if (len1 == len2)
        !           769:                        return 0;
        !           770:                return len1 < len2 ? -1 : 1;
        !           771:        }
1.1       millert   772: }
                    773:
                    774: /*
                    775:  * Correction of the system API
                    776:  */
                    777: double
                    778: bwstod(struct bwstring *s0, bool *empty)
                    779: {
                    780:        double ret = 0;
                    781:
                    782:        if (sort_mb_cur_max == 1) {
                    783:                char *ep, *end, *s;
                    784:
                    785:                s = (char *)s0->data.cstr;
                    786:                end = s + s0->len;
                    787:                ep = NULL;
                    788:
                    789:                while (isblank((unsigned char)*s) && s < end)
                    790:                        ++s;
                    791:
                    792:                if (!isprint((unsigned char)*s)) {
                    793:                        *empty = true;
                    794:                        return 0;
                    795:                }
                    796:
                    797:                ret = strtod(s, &ep);
                    798:                if (ep == s) {
                    799:                        *empty = true;
                    800:                        return 0;
                    801:                }
                    802:        } else {
                    803:                wchar_t *end, *ep, *s;
                    804:
                    805:                s = s0->data.wstr;
                    806:                end = s + s0->len;
                    807:                ep = NULL;
                    808:
                    809:                while (iswblank(*s) && s < end)
                    810:                        ++s;
                    811:
                    812:                if (!iswprint(*s)) {
                    813:                        *empty = true;
                    814:                        return 0;
                    815:                }
                    816:
                    817:                ret = wcstod(s, &ep);
                    818:                if (ep == s) {
                    819:                        *empty = true;
                    820:                        return 0;
                    821:                }
                    822:        }
                    823:
                    824:        *empty = false;
                    825:        return ret;
                    826: }
                    827:
                    828: /*
                    829:  * A helper function for monthcoll.  If a line matches
                    830:  * a month name, it returns (number of the month - 1),
                    831:  * while if there is no match, it just return -1.
                    832:  */
                    833: int
                    834: bws_month_score(const struct bwstring *s0)
                    835: {
                    836:        if (sort_mb_cur_max == 1) {
                    837:                const char *end, *s;
                    838:                int i;
                    839:
                    840:                s = (char *)s0->data.cstr;
                    841:                end = s + s0->len;
                    842:
                    843:                while (isblank((unsigned char)*s) && s < end)
                    844:                        ++s;
                    845:
                    846:                for (i = 11; i >= 0; --i) {
                    847:                        if (cmonths[i] &&
                    848:                            (s == strstr(s, cmonths[i])))
                    849:                                return i;
                    850:                }
                    851:        } else {
                    852:                const wchar_t *end, *s;
                    853:                int i;
                    854:
                    855:                s = s0->data.wstr;
                    856:                end = s + s0->len;
                    857:
                    858:                while (iswblank(*s) && s < end)
                    859:                        ++s;
                    860:
                    861:                for (i = 11; i >= 0; --i) {
                    862:                        if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
                    863:                                return i;
                    864:                }
                    865:        }
                    866:
                    867:        return -1;
                    868: }
                    869:
                    870: /*
                    871:  * Rips out leading blanks (-b).
                    872:  */
                    873: struct bwstring *
                    874: ignore_leading_blanks(struct bwstring *str)
                    875: {
                    876:        if (sort_mb_cur_max == 1) {
                    877:                unsigned char *dst, *end, *src;
                    878:
                    879:                src = str->data.cstr;
                    880:                dst = src;
                    881:                end = src + str->len;
                    882:
                    883:                while (src < end && isblank(*src))
                    884:                        ++src;
                    885:
                    886:                if (src != dst) {
                    887:                        size_t newlen;
                    888:
                    889:                        newlen = BWSLEN(str) - (src - dst);
                    890:
                    891:                        while (src < end) {
                    892:                                *dst = *src;
                    893:                                ++dst;
                    894:                                ++src;
                    895:                        }
                    896:                        bws_setlen(str, newlen);
                    897:                }
                    898:        } else {
                    899:                wchar_t *dst, *end, *src;
                    900:
                    901:                src = str->data.wstr;
                    902:                dst = src;
                    903:                end = src + str->len;
                    904:
                    905:                while (src < end && iswblank(*src))
                    906:                        ++src;
                    907:
                    908:                if (src != dst) {
                    909:
                    910:                        size_t newlen = BWSLEN(str) - (src - dst);
                    911:
                    912:                        while (src < end) {
                    913:                                *dst = *src;
                    914:                                ++dst;
                    915:                                ++src;
                    916:                        }
                    917:                        bws_setlen(str, newlen);
                    918:
                    919:                }
                    920:        }
                    921:        return str;
                    922: }
                    923:
                    924: /*
                    925:  * Rips out nonprinting characters (-i).
                    926:  */
                    927: struct bwstring *
                    928: ignore_nonprinting(struct bwstring *str)
                    929: {
                    930:        size_t newlen = str->len;
                    931:
                    932:        if (sort_mb_cur_max == 1) {
                    933:                unsigned char *dst, *end, *src;
                    934:                unsigned char c;
                    935:
                    936:                src = str->data.cstr;
                    937:                dst = src;
                    938:                end = src + str->len;
                    939:
                    940:                while (src < end) {
                    941:                        c = *src;
                    942:                        if (isprint(c)) {
                    943:                                *dst = c;
                    944:                                ++dst;
                    945:                                ++src;
                    946:                        } else {
                    947:                                ++src;
                    948:                                --newlen;
                    949:                        }
                    950:                }
                    951:        } else {
                    952:                wchar_t *dst, *end, *src;
                    953:                wchar_t c;
                    954:
                    955:                src = str->data.wstr;
                    956:                dst = src;
                    957:                end = src + str->len;
                    958:
                    959:                while (src < end) {
                    960:                        c = *src;
                    961:                        if (iswprint(c)) {
                    962:                                *dst = c;
                    963:                                ++dst;
                    964:                                ++src;
                    965:                        } else {
                    966:                                ++src;
                    967:                                --newlen;
                    968:                        }
                    969:                }
                    970:        }
                    971:        bws_setlen(str, newlen);
                    972:
                    973:        return str;
                    974: }
                    975:
                    976: /*
                    977:  * Rips out any characters that are not alphanumeric characters
                    978:  * nor blanks (-d).
                    979:  */
                    980: struct bwstring *
                    981: dictionary_order(struct bwstring *str)
                    982: {
                    983:        size_t newlen = str->len;
                    984:
                    985:        if (sort_mb_cur_max == 1) {
                    986:                unsigned char *dst, *end, *src;
                    987:                unsigned char c;
                    988:
                    989:                src = str->data.cstr;
                    990:                dst = src;
                    991:                end = src + str->len;
                    992:
                    993:                while (src < end) {
                    994:                        c = *src;
                    995:                        if (isalnum(c) || isblank(c)) {
                    996:                                *dst = c;
                    997:                                ++dst;
                    998:                                ++src;
                    999:                        } else {
                   1000:                                ++src;
                   1001:                                --newlen;
                   1002:                        }
                   1003:                }
                   1004:        } else {
                   1005:                wchar_t *dst, *end, *src;
                   1006:                wchar_t c;
                   1007:
                   1008:                src = str->data.wstr;
                   1009:                dst = src;
                   1010:                end = src + str->len;
                   1011:
                   1012:                while (src < end) {
                   1013:                        c = *src;
                   1014:                        if (iswalnum(c) || iswblank(c)) {
                   1015:                                *dst = c;
                   1016:                                ++dst;
                   1017:                                ++src;
                   1018:                        } else {
                   1019:                                ++src;
                   1020:                                --newlen;
                   1021:                        }
                   1022:                }
                   1023:        }
                   1024:        bws_setlen(str, newlen);
                   1025:
                   1026:        return str;
                   1027: }
                   1028:
                   1029: /*
                   1030:  * Converts string to lower case(-f).
                   1031:  */
                   1032: struct bwstring *
                   1033: ignore_case(struct bwstring *str)
                   1034: {
                   1035:        if (sort_mb_cur_max == 1) {
                   1036:                unsigned char *end, *s;
                   1037:
                   1038:                s = str->data.cstr;
                   1039:                end = s + str->len;
                   1040:
                   1041:                while (s < end) {
                   1042:                        *s = toupper(*s);
                   1043:                        ++s;
                   1044:                }
                   1045:        } else {
                   1046:                wchar_t *end, *s;
                   1047:
                   1048:                s = str->data.wstr;
                   1049:                end = s + str->len;
                   1050:
                   1051:                while (s < end) {
                   1052:                        *s = towupper(*s);
                   1053:                        ++s;
                   1054:                }
                   1055:        }
                   1056:        return str;
                   1057: }
                   1058:
                   1059: void
                   1060: bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
                   1061: {
                   1062:        if (sort_mb_cur_max == 1)
                   1063:                warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr);
                   1064:        else
                   1065:                warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr);
                   1066: }