[BACK]Return to vsort.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / sort

Annotation of src/usr.bin/sort/vsort.c, Revision 1.2

1.2     ! millert     1: /*     $OpenBSD: vsort.c,v 1.1 2015/03/17 17:45:13 millert Exp $       */
1.1       millert     2:
                      3: /*-
                      4:  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
                      5:  * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  */
                     29:
                     30: #include <sys/types.h>
                     31:
                     32: #include <ctype.h>
                     33: #include <stdlib.h>
                     34: #include <string.h>
                     35:
                     36: #include "sort.h"
                     37: #include "vsort.h"
                     38:
                     39: static inline bool
                     40: isdigit_clocale(wchar_t c)
                     41: {
                     42:        return (c >= L'0' && c <= L'9');
                     43: }
                     44:
                     45: static inline bool
                     46: isalpha_clocale(wchar_t c)
                     47: {
                     48:        return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'));
                     49: }
                     50:
                     51: static inline bool
                     52: isalnum_clocale(wchar_t c)
                     53: {
                     54:        return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') ||
                     55:            (c >= L'0' && c <= L'9'));
                     56: }
                     57:
                     58: /*
                     59:  * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$
                     60:  * Set length of string before suffix.
                     61:  */
                     62: static void
                     63: find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len)
                     64: {
                     65:        wchar_t c;
                     66:        size_t clen;
                     67:        bool expect_alpha, sfx;
                     68:
                     69:        sfx = false;
                     70:        expect_alpha = false;
                     71:        *len = 0;
                     72:        clen = 0;
                     73:
                     74:        while ((si < se) && (c = bws_get_iter_value(si))) {
                     75:                if (expect_alpha) {
                     76:                        expect_alpha = false;
                     77:                        if (!isalpha_clocale(c) && (c != L'~'))
                     78:                                sfx = false;
                     79:                } else if (c == L'.') {
                     80:                        expect_alpha = true;
                     81:                        if (!sfx) {
                     82:                                sfx = true;
                     83:                                *len = clen;
                     84:                        }
                     85:                } else if (!isalnum_clocale(c) && (c != L'~'))
                     86:                        sfx = false;
                     87:
                     88:                si = bws_iterator_inc(si, 1);
                     89:                ++clen;
                     90:        }
                     91:
                     92:        /* This code must be here to make the implementation compatible
                     93:         * with WORDING of GNU sort documentation.
                     94:         * But the GNU sort implementation is not following its own
                     95:         * documentation.  GNU sort allows empty file extensions
                     96:         * (just dot with nothing after); but the regular expression in
                     97:         * their documentation does not allow empty file extensions.
                     98:         * We chose to make our implementation compatible with GNU sort
                     99:         * implementation.  If they will ever fix their bug, this code
                    100:         * must be uncommented. Or they may choose to fix the info page,
                    101:         * then the code stays commented.
                    102:         *
                    103:         if (expect_alpha)
                    104:                sfx = false;
                    105:         */
                    106:
                    107:        if (!sfx)
                    108:                *len = clen;
                    109: }
                    110:
                    111: static inline int
                    112: cmp_chars(wchar_t c1, wchar_t c2)
                    113: {
                    114:        if (c1 == c2)
                    115:                return 0;
                    116:
                    117:        if (c1 == L'~')
                    118:                return -1;
                    119:        if (c2 == L'~')
                    120:                return 1;
                    121:
                    122:        if (isdigit_clocale(c1) || !c1)
                    123:                return (isdigit_clocale(c2) || !c2) ? 0 : -1;
                    124:
                    125:        if (isdigit_clocale(c2) || !c2)
                    126:                return 1;
                    127:
                    128:        if (isalpha_clocale(c1))
                    129:                return isalpha_clocale(c2) ? (int)c1 - (int)c2 : -1;
                    130:
                    131:        if (isalpha_clocale(c2))
                    132:                return 1;
                    133:
                    134:        return (int)c1 - (int)c2;
                    135: }
                    136:
                    137: static int
                    138: cmpversions(bwstring_iterator si1, bwstring_iterator se1,
                    139:     bwstring_iterator si2, bwstring_iterator se2)
                    140: {
                    141:        int cmp, diff;
                    142:
                    143:        while ((si1 < se1) || (si2 < se2)) {
                    144:                diff = 0;
                    145:
                    146:                while (((si1 < se1) &&
                    147:                    !isdigit_clocale(bws_get_iter_value(si1))) ||
                    148:                    ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) {
                    149:                        wchar_t c1, c2;
                    150:
                    151:                        c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0;
                    152:                        c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0;
                    153:
                    154:                        cmp = cmp_chars(c1, c2);
                    155:                        if (cmp)
                    156:                                return cmp;
                    157:
                    158:                        if (si1 < se1)
                    159:                                si1 = bws_iterator_inc(si1, 1);
                    160:                        if (si2 < se2)
                    161:                                si2 = bws_iterator_inc(si2, 1);
                    162:                }
                    163:
                    164:                while (bws_get_iter_value(si1) == L'0')
                    165:                        si1 = bws_iterator_inc(si1, 1);
                    166:
                    167:                while (bws_get_iter_value(si2) == L'0')
                    168:                        si2 = bws_iterator_inc(si2, 1);
                    169:
                    170:                while (isdigit_clocale(bws_get_iter_value(si1)) &&
                    171:                    isdigit_clocale(bws_get_iter_value(si2))) {
                    172:                        if (!diff)
                    173:                                diff = ((int)bws_get_iter_value(si1) -
                    174:                                    (int)bws_get_iter_value(si2));
                    175:                        si1 = bws_iterator_inc(si1, 1);
                    176:                        si2 = bws_iterator_inc(si2, 1);
                    177:                }
                    178:
                    179:                if (isdigit_clocale(bws_get_iter_value(si1)))
                    180:                        return 1;
                    181:
                    182:                if (isdigit_clocale(bws_get_iter_value(si2)))
                    183:                        return -1;
                    184:
                    185:                if (diff)
                    186:                        return diff;
                    187:        }
                    188:
                    189:        return 0;
                    190: }
                    191:
                    192: /*
                    193:  * Compare two version strings
                    194:  */
                    195: int
                    196: vcmp(struct bwstring *s1, struct bwstring *s2)
                    197: {
                    198:        bwstring_iterator si1, si2;
                    199:        wchar_t c1, c2;
                    200:        size_t len1, len2, slen1, slen2;
                    201:        int cmp_bytes, cmp_res;
                    202:
                    203:        if (s1 == s2)
                    204:                return 0;
                    205:
                    206:        cmp_bytes = bwscmp(s1, s2, 0);
                    207:        if (cmp_bytes == 0)
                    208:                return 0;
                    209:
                    210:        len1 = slen1 = BWSLEN(s1);
                    211:        len2 = slen2 = BWSLEN(s2);
                    212:
                    213:        if (slen1 < 1)
                    214:                return -1;
                    215:        if (slen2 < 1)
                    216:                return 1;
                    217:
                    218:        si1 = bws_begin(s1);
                    219:        si2 = bws_begin(s2);
                    220:
                    221:        c1 = bws_get_iter_value(si1);
                    222:        c2 = bws_get_iter_value(si2);
                    223:
                    224:        if (c1 == L'.' && (slen1 == 1))
                    225:                return -1;
                    226:
                    227:        if (c2 == L'.' && (slen2 == 1))
                    228:                return 1;
                    229:
                    230:        if (slen1 == 2 && c1 == L'.' &&
                    231:            bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.')
                    232:                return -1;
                    233:        if (slen2 == 2 && c2 == L'.' &&
                    234:            bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.')
                    235:                return 1;
                    236:
                    237:        if (c1 == L'.' && c2 != L'.')
                    238:                return -1;
                    239:        if (c1 != L'.' && c2 == L'.')
                    240:                return 1;
                    241:
                    242:        if (c1 == L'.' && c2 == L'.') {
                    243:                si1 = bws_iterator_inc(si1, 1);
                    244:                si2 = bws_iterator_inc(si2, 1);
                    245:        }
                    246:
                    247:        find_suffix(si1, bws_end(s1), &len1);
                    248:        find_suffix(si2, bws_end(s2), &len2);
                    249:
                    250:        if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0))
                    251:                return cmp_bytes;
                    252:
                    253:        cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2,
                    254:            bws_iterator_inc(si2, len2));
                    255:
                    256:        if (cmp_res == 0)
                    257:          cmp_res = cmp_bytes;
                    258:
                    259:        return cmp_res;
                    260: }