Annotation of src/usr.bin/sort/vsort.c, Revision 1.1
1.1 ! millert 1: /* $OpenBSD$ */
! 2:
! 3: /*-
! 4: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
! 5: * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
! 6: * All rights reserved.
! 7: *
! 8: * Redistribution and use in source and binary forms, with or without
! 9: * modification, are permitted provided that the following conditions
! 10: * are met:
! 11: * 1. Redistributions of source code must retain the above copyright
! 12: * notice, this list of conditions and the following disclaimer.
! 13: * 2. Redistributions in binary form must reproduce the above copyright
! 14: * notice, this list of conditions and the following disclaimer in the
! 15: * documentation and/or other materials provided with the distribution.
! 16: *
! 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
! 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
! 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 27: * SUCH DAMAGE.
! 28: */
! 29:
! 30: #include <sys/types.h>
! 31:
! 32: #include <ctype.h>
! 33: #include <stdlib.h>
! 34: #include <string.h>
! 35:
! 36: #include "sort.h"
! 37: #include "vsort.h"
! 38:
! 39: static inline bool
! 40: isdigit_clocale(wchar_t c)
! 41: {
! 42:
! 43: return (c >= L'0' && c <= L'9');
! 44: }
! 45:
! 46: static inline bool
! 47: isalpha_clocale(wchar_t c)
! 48: {
! 49:
! 50: return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'));
! 51: }
! 52:
! 53: static inline bool
! 54: isalnum_clocale(wchar_t c)
! 55: {
! 56:
! 57: return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') ||
! 58: (c >= L'0' && c <= L'9'));
! 59: }
! 60:
! 61: /*
! 62: * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$
! 63: * Set length of string before suffix.
! 64: */
! 65: static void
! 66: find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len)
! 67: {
! 68: wchar_t c;
! 69: size_t clen;
! 70: bool expect_alpha, sfx;
! 71:
! 72: sfx = false;
! 73: expect_alpha = false;
! 74: *len = 0;
! 75: clen = 0;
! 76:
! 77: while ((si < se) && (c = bws_get_iter_value(si))) {
! 78: if (expect_alpha) {
! 79: expect_alpha = false;
! 80: if (!isalpha_clocale(c) && (c != L'~'))
! 81: sfx = false;
! 82: } else if (c == L'.') {
! 83: expect_alpha = true;
! 84: if (!sfx) {
! 85: sfx = true;
! 86: *len = clen;
! 87: }
! 88: } else if (!isalnum_clocale(c) && (c != L'~'))
! 89: sfx = false;
! 90:
! 91: si = bws_iterator_inc(si, 1);
! 92: ++clen;
! 93: }
! 94:
! 95: /* This code must be here to make the implementation compatible
! 96: * with WORDING of GNU sort documentation.
! 97: * But the GNU sort implementation is not following its own
! 98: * documentation. GNU sort allows empty file extensions
! 99: * (just dot with nothing after); but the regular expression in
! 100: * their documentation does not allow empty file extensions.
! 101: * We chose to make our implementation compatible with GNU sort
! 102: * implementation. If they will ever fix their bug, this code
! 103: * must be uncommented. Or they may choose to fix the info page,
! 104: * then the code stays commented.
! 105: *
! 106: if (expect_alpha)
! 107: sfx = false;
! 108: */
! 109:
! 110: if (!sfx)
! 111: *len = clen;
! 112: }
! 113:
! 114: static inline int
! 115: cmp_chars(wchar_t c1, wchar_t c2)
! 116: {
! 117:
! 118: if (c1 == c2)
! 119: return 0;
! 120:
! 121: if (c1 == L'~')
! 122: return -1;
! 123: if (c2 == L'~')
! 124: return 1;
! 125:
! 126: if (isdigit_clocale(c1) || !c1)
! 127: return (isdigit_clocale(c2) || !c2) ? 0 : -1;
! 128:
! 129: if (isdigit_clocale(c2) || !c2)
! 130: return 1;
! 131:
! 132: if (isalpha_clocale(c1))
! 133: return isalpha_clocale(c2) ? (int)c1 - (int)c2 : -1;
! 134:
! 135: if (isalpha_clocale(c2))
! 136: return 1;
! 137:
! 138: return (int)c1 - (int)c2;
! 139: }
! 140:
! 141: static int
! 142: cmpversions(bwstring_iterator si1, bwstring_iterator se1,
! 143: bwstring_iterator si2, bwstring_iterator se2)
! 144: {
! 145: int cmp, diff;
! 146:
! 147: while ((si1 < se1) || (si2 < se2)) {
! 148: diff = 0;
! 149:
! 150: while (((si1 < se1) &&
! 151: !isdigit_clocale(bws_get_iter_value(si1))) ||
! 152: ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) {
! 153: wchar_t c1, c2;
! 154:
! 155: c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0;
! 156: c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0;
! 157:
! 158: cmp = cmp_chars(c1, c2);
! 159: if (cmp)
! 160: return cmp;
! 161:
! 162: if (si1 < se1)
! 163: si1 = bws_iterator_inc(si1, 1);
! 164: if (si2 < se2)
! 165: si2 = bws_iterator_inc(si2, 1);
! 166: }
! 167:
! 168: while (bws_get_iter_value(si1) == L'0')
! 169: si1 = bws_iterator_inc(si1, 1);
! 170:
! 171: while (bws_get_iter_value(si2) == L'0')
! 172: si2 = bws_iterator_inc(si2, 1);
! 173:
! 174: while (isdigit_clocale(bws_get_iter_value(si1)) &&
! 175: isdigit_clocale(bws_get_iter_value(si2))) {
! 176: if (!diff)
! 177: diff = ((int)bws_get_iter_value(si1) -
! 178: (int)bws_get_iter_value(si2));
! 179: si1 = bws_iterator_inc(si1, 1);
! 180: si2 = bws_iterator_inc(si2, 1);
! 181: }
! 182:
! 183: if (isdigit_clocale(bws_get_iter_value(si1)))
! 184: return 1;
! 185:
! 186: if (isdigit_clocale(bws_get_iter_value(si2)))
! 187: return -1;
! 188:
! 189: if (diff)
! 190: return diff;
! 191: }
! 192:
! 193: return 0;
! 194: }
! 195:
! 196: /*
! 197: * Compare two version strings
! 198: */
! 199: int
! 200: vcmp(struct bwstring *s1, struct bwstring *s2)
! 201: {
! 202: bwstring_iterator si1, si2;
! 203: wchar_t c1, c2;
! 204: size_t len1, len2, slen1, slen2;
! 205: int cmp_bytes, cmp_res;
! 206:
! 207: if (s1 == s2)
! 208: return 0;
! 209:
! 210: cmp_bytes = bwscmp(s1, s2, 0);
! 211: if (cmp_bytes == 0)
! 212: return 0;
! 213:
! 214: len1 = slen1 = BWSLEN(s1);
! 215: len2 = slen2 = BWSLEN(s2);
! 216:
! 217: if (slen1 < 1)
! 218: return -1;
! 219: if (slen2 < 1)
! 220: return 1;
! 221:
! 222: si1 = bws_begin(s1);
! 223: si2 = bws_begin(s2);
! 224:
! 225: c1 = bws_get_iter_value(si1);
! 226: c2 = bws_get_iter_value(si2);
! 227:
! 228: if (c1 == L'.' && (slen1 == 1))
! 229: return -1;
! 230:
! 231: if (c2 == L'.' && (slen2 == 1))
! 232: return 1;
! 233:
! 234: if (slen1 == 2 && c1 == L'.' &&
! 235: bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.')
! 236: return -1;
! 237: if (slen2 == 2 && c2 == L'.' &&
! 238: bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.')
! 239: return 1;
! 240:
! 241: if (c1 == L'.' && c2 != L'.')
! 242: return -1;
! 243: if (c1 != L'.' && c2 == L'.')
! 244: return 1;
! 245:
! 246: if (c1 == L'.' && c2 == L'.') {
! 247: si1 = bws_iterator_inc(si1, 1);
! 248: si2 = bws_iterator_inc(si2, 1);
! 249: }
! 250:
! 251: find_suffix(si1, bws_end(s1), &len1);
! 252: find_suffix(si2, bws_end(s2), &len2);
! 253:
! 254: if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0))
! 255: return cmp_bytes;
! 256:
! 257: cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2,
! 258: bws_iterator_inc(si2, len2));
! 259:
! 260: if (cmp_res == 0)
! 261: cmp_res = cmp_bytes;
! 262:
! 263: return cmp_res;
! 264: }