[BACK]Return to vsort.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / sort

Annotation of src/usr.bin/sort/vsort.c, Revision 1.1

1.1     ! millert     1: /*     $OpenBSD$       */
        !             2:
        !             3: /*-
        !             4:  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
        !             5:  * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
        !             6:  * All rights reserved.
        !             7:  *
        !             8:  * Redistribution and use in source and binary forms, with or without
        !             9:  * modification, are permitted provided that the following conditions
        !            10:  * are met:
        !            11:  * 1. Redistributions of source code must retain the above copyright
        !            12:  *    notice, this list of conditions and the following disclaimer.
        !            13:  * 2. Redistributions in binary form must reproduce the above copyright
        !            14:  *    notice, this list of conditions and the following disclaimer in the
        !            15:  *    documentation and/or other materials provided with the distribution.
        !            16:  *
        !            17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
        !            18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
        !            21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            27:  * SUCH DAMAGE.
        !            28:  */
        !            29:
        !            30: #include <sys/types.h>
        !            31:
        !            32: #include <ctype.h>
        !            33: #include <stdlib.h>
        !            34: #include <string.h>
        !            35:
        !            36: #include "sort.h"
        !            37: #include "vsort.h"
        !            38:
        !            39: static inline bool
        !            40: isdigit_clocale(wchar_t c)
        !            41: {
        !            42:
        !            43:        return (c >= L'0' && c <= L'9');
        !            44: }
        !            45:
        !            46: static inline bool
        !            47: isalpha_clocale(wchar_t c)
        !            48: {
        !            49:
        !            50:        return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'));
        !            51: }
        !            52:
        !            53: static inline bool
        !            54: isalnum_clocale(wchar_t c)
        !            55: {
        !            56:
        !            57:        return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') ||
        !            58:            (c >= L'0' && c <= L'9'));
        !            59: }
        !            60:
        !            61: /*
        !            62:  * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$
        !            63:  * Set length of string before suffix.
        !            64:  */
        !            65: static void
        !            66: find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len)
        !            67: {
        !            68:        wchar_t c;
        !            69:        size_t clen;
        !            70:        bool expect_alpha, sfx;
        !            71:
        !            72:        sfx = false;
        !            73:        expect_alpha = false;
        !            74:        *len = 0;
        !            75:        clen = 0;
        !            76:
        !            77:        while ((si < se) && (c = bws_get_iter_value(si))) {
        !            78:                if (expect_alpha) {
        !            79:                        expect_alpha = false;
        !            80:                        if (!isalpha_clocale(c) && (c != L'~'))
        !            81:                                sfx = false;
        !            82:                } else if (c == L'.') {
        !            83:                        expect_alpha = true;
        !            84:                        if (!sfx) {
        !            85:                                sfx = true;
        !            86:                                *len = clen;
        !            87:                        }
        !            88:                } else if (!isalnum_clocale(c) && (c != L'~'))
        !            89:                        sfx = false;
        !            90:
        !            91:                si = bws_iterator_inc(si, 1);
        !            92:                ++clen;
        !            93:        }
        !            94:
        !            95:        /* This code must be here to make the implementation compatible
        !            96:         * with WORDING of GNU sort documentation.
        !            97:         * But the GNU sort implementation is not following its own
        !            98:         * documentation.  GNU sort allows empty file extensions
        !            99:         * (just dot with nothing after); but the regular expression in
        !           100:         * their documentation does not allow empty file extensions.
        !           101:         * We chose to make our implementation compatible with GNU sort
        !           102:         * implementation.  If they will ever fix their bug, this code
        !           103:         * must be uncommented. Or they may choose to fix the info page,
        !           104:         * then the code stays commented.
        !           105:         *
        !           106:         if (expect_alpha)
        !           107:                sfx = false;
        !           108:         */
        !           109:
        !           110:        if (!sfx)
        !           111:                *len = clen;
        !           112: }
        !           113:
        !           114: static inline int
        !           115: cmp_chars(wchar_t c1, wchar_t c2)
        !           116: {
        !           117:
        !           118:        if (c1 == c2)
        !           119:                return 0;
        !           120:
        !           121:        if (c1 == L'~')
        !           122:                return -1;
        !           123:        if (c2 == L'~')
        !           124:                return 1;
        !           125:
        !           126:        if (isdigit_clocale(c1) || !c1)
        !           127:                return (isdigit_clocale(c2) || !c2) ? 0 : -1;
        !           128:
        !           129:        if (isdigit_clocale(c2) || !c2)
        !           130:                return 1;
        !           131:
        !           132:        if (isalpha_clocale(c1))
        !           133:                return isalpha_clocale(c2) ? (int)c1 - (int)c2 : -1;
        !           134:
        !           135:        if (isalpha_clocale(c2))
        !           136:                return 1;
        !           137:
        !           138:        return (int)c1 - (int)c2;
        !           139: }
        !           140:
        !           141: static int
        !           142: cmpversions(bwstring_iterator si1, bwstring_iterator se1,
        !           143:     bwstring_iterator si2, bwstring_iterator se2)
        !           144: {
        !           145:        int cmp, diff;
        !           146:
        !           147:        while ((si1 < se1) || (si2 < se2)) {
        !           148:                diff = 0;
        !           149:
        !           150:                while (((si1 < se1) &&
        !           151:                    !isdigit_clocale(bws_get_iter_value(si1))) ||
        !           152:                    ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) {
        !           153:                        wchar_t c1, c2;
        !           154:
        !           155:                        c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0;
        !           156:                        c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0;
        !           157:
        !           158:                        cmp = cmp_chars(c1, c2);
        !           159:                        if (cmp)
        !           160:                                return cmp;
        !           161:
        !           162:                        if (si1 < se1)
        !           163:                                si1 = bws_iterator_inc(si1, 1);
        !           164:                        if (si2 < se2)
        !           165:                                si2 = bws_iterator_inc(si2, 1);
        !           166:                }
        !           167:
        !           168:                while (bws_get_iter_value(si1) == L'0')
        !           169:                        si1 = bws_iterator_inc(si1, 1);
        !           170:
        !           171:                while (bws_get_iter_value(si2) == L'0')
        !           172:                        si2 = bws_iterator_inc(si2, 1);
        !           173:
        !           174:                while (isdigit_clocale(bws_get_iter_value(si1)) &&
        !           175:                    isdigit_clocale(bws_get_iter_value(si2))) {
        !           176:                        if (!diff)
        !           177:                                diff = ((int)bws_get_iter_value(si1) -
        !           178:                                    (int)bws_get_iter_value(si2));
        !           179:                        si1 = bws_iterator_inc(si1, 1);
        !           180:                        si2 = bws_iterator_inc(si2, 1);
        !           181:                }
        !           182:
        !           183:                if (isdigit_clocale(bws_get_iter_value(si1)))
        !           184:                        return 1;
        !           185:
        !           186:                if (isdigit_clocale(bws_get_iter_value(si2)))
        !           187:                        return -1;
        !           188:
        !           189:                if (diff)
        !           190:                        return diff;
        !           191:        }
        !           192:
        !           193:        return 0;
        !           194: }
        !           195:
        !           196: /*
        !           197:  * Compare two version strings
        !           198:  */
        !           199: int
        !           200: vcmp(struct bwstring *s1, struct bwstring *s2)
        !           201: {
        !           202:        bwstring_iterator si1, si2;
        !           203:        wchar_t c1, c2;
        !           204:        size_t len1, len2, slen1, slen2;
        !           205:        int cmp_bytes, cmp_res;
        !           206:
        !           207:        if (s1 == s2)
        !           208:                return 0;
        !           209:
        !           210:        cmp_bytes = bwscmp(s1, s2, 0);
        !           211:        if (cmp_bytes == 0)
        !           212:                return 0;
        !           213:
        !           214:        len1 = slen1 = BWSLEN(s1);
        !           215:        len2 = slen2 = BWSLEN(s2);
        !           216:
        !           217:        if (slen1 < 1)
        !           218:                return -1;
        !           219:        if (slen2 < 1)
        !           220:                return 1;
        !           221:
        !           222:        si1 = bws_begin(s1);
        !           223:        si2 = bws_begin(s2);
        !           224:
        !           225:        c1 = bws_get_iter_value(si1);
        !           226:        c2 = bws_get_iter_value(si2);
        !           227:
        !           228:        if (c1 == L'.' && (slen1 == 1))
        !           229:                return -1;
        !           230:
        !           231:        if (c2 == L'.' && (slen2 == 1))
        !           232:                return 1;
        !           233:
        !           234:        if (slen1 == 2 && c1 == L'.' &&
        !           235:            bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.')
        !           236:                return -1;
        !           237:        if (slen2 == 2 && c2 == L'.' &&
        !           238:            bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.')
        !           239:                return 1;
        !           240:
        !           241:        if (c1 == L'.' && c2 != L'.')
        !           242:                return -1;
        !           243:        if (c1 != L'.' && c2 == L'.')
        !           244:                return 1;
        !           245:
        !           246:        if (c1 == L'.' && c2 == L'.') {
        !           247:                si1 = bws_iterator_inc(si1, 1);
        !           248:                si2 = bws_iterator_inc(si2, 1);
        !           249:        }
        !           250:
        !           251:        find_suffix(si1, bws_end(s1), &len1);
        !           252:        find_suffix(si2, bws_end(s2), &len2);
        !           253:
        !           254:        if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0))
        !           255:                return cmp_bytes;
        !           256:
        !           257:        cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2,
        !           258:            bws_iterator_inc(si2, len2));
        !           259:
        !           260:        if (cmp_res == 0)
        !           261:          cmp_res = cmp_bytes;
        !           262:
        !           263:        return cmp_res;
        !           264: }