Annotation of src/usr.bin/sort/vsort.c, Revision 1.2
1.2 ! millert 1: /* $OpenBSD: vsort.c,v 1.1 2015/03/17 17:45:13 millert Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
5: * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
6: * All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
30: #include <sys/types.h>
31:
32: #include <ctype.h>
33: #include <stdlib.h>
34: #include <string.h>
35:
36: #include "sort.h"
37: #include "vsort.h"
38:
39: static inline bool
40: isdigit_clocale(wchar_t c)
41: {
42: return (c >= L'0' && c <= L'9');
43: }
44:
45: static inline bool
46: isalpha_clocale(wchar_t c)
47: {
48: return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'));
49: }
50:
51: static inline bool
52: isalnum_clocale(wchar_t c)
53: {
54: return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') ||
55: (c >= L'0' && c <= L'9'));
56: }
57:
58: /*
59: * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$
60: * Set length of string before suffix.
61: */
62: static void
63: find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len)
64: {
65: wchar_t c;
66: size_t clen;
67: bool expect_alpha, sfx;
68:
69: sfx = false;
70: expect_alpha = false;
71: *len = 0;
72: clen = 0;
73:
74: while ((si < se) && (c = bws_get_iter_value(si))) {
75: if (expect_alpha) {
76: expect_alpha = false;
77: if (!isalpha_clocale(c) && (c != L'~'))
78: sfx = false;
79: } else if (c == L'.') {
80: expect_alpha = true;
81: if (!sfx) {
82: sfx = true;
83: *len = clen;
84: }
85: } else if (!isalnum_clocale(c) && (c != L'~'))
86: sfx = false;
87:
88: si = bws_iterator_inc(si, 1);
89: ++clen;
90: }
91:
92: /* This code must be here to make the implementation compatible
93: * with WORDING of GNU sort documentation.
94: * But the GNU sort implementation is not following its own
95: * documentation. GNU sort allows empty file extensions
96: * (just dot with nothing after); but the regular expression in
97: * their documentation does not allow empty file extensions.
98: * We chose to make our implementation compatible with GNU sort
99: * implementation. If they will ever fix their bug, this code
100: * must be uncommented. Or they may choose to fix the info page,
101: * then the code stays commented.
102: *
103: if (expect_alpha)
104: sfx = false;
105: */
106:
107: if (!sfx)
108: *len = clen;
109: }
110:
111: static inline int
112: cmp_chars(wchar_t c1, wchar_t c2)
113: {
114: if (c1 == c2)
115: return 0;
116:
117: if (c1 == L'~')
118: return -1;
119: if (c2 == L'~')
120: return 1;
121:
122: if (isdigit_clocale(c1) || !c1)
123: return (isdigit_clocale(c2) || !c2) ? 0 : -1;
124:
125: if (isdigit_clocale(c2) || !c2)
126: return 1;
127:
128: if (isalpha_clocale(c1))
129: return isalpha_clocale(c2) ? (int)c1 - (int)c2 : -1;
130:
131: if (isalpha_clocale(c2))
132: return 1;
133:
134: return (int)c1 - (int)c2;
135: }
136:
137: static int
138: cmpversions(bwstring_iterator si1, bwstring_iterator se1,
139: bwstring_iterator si2, bwstring_iterator se2)
140: {
141: int cmp, diff;
142:
143: while ((si1 < se1) || (si2 < se2)) {
144: diff = 0;
145:
146: while (((si1 < se1) &&
147: !isdigit_clocale(bws_get_iter_value(si1))) ||
148: ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) {
149: wchar_t c1, c2;
150:
151: c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0;
152: c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0;
153:
154: cmp = cmp_chars(c1, c2);
155: if (cmp)
156: return cmp;
157:
158: if (si1 < se1)
159: si1 = bws_iterator_inc(si1, 1);
160: if (si2 < se2)
161: si2 = bws_iterator_inc(si2, 1);
162: }
163:
164: while (bws_get_iter_value(si1) == L'0')
165: si1 = bws_iterator_inc(si1, 1);
166:
167: while (bws_get_iter_value(si2) == L'0')
168: si2 = bws_iterator_inc(si2, 1);
169:
170: while (isdigit_clocale(bws_get_iter_value(si1)) &&
171: isdigit_clocale(bws_get_iter_value(si2))) {
172: if (!diff)
173: diff = ((int)bws_get_iter_value(si1) -
174: (int)bws_get_iter_value(si2));
175: si1 = bws_iterator_inc(si1, 1);
176: si2 = bws_iterator_inc(si2, 1);
177: }
178:
179: if (isdigit_clocale(bws_get_iter_value(si1)))
180: return 1;
181:
182: if (isdigit_clocale(bws_get_iter_value(si2)))
183: return -1;
184:
185: if (diff)
186: return diff;
187: }
188:
189: return 0;
190: }
191:
192: /*
193: * Compare two version strings
194: */
195: int
196: vcmp(struct bwstring *s1, struct bwstring *s2)
197: {
198: bwstring_iterator si1, si2;
199: wchar_t c1, c2;
200: size_t len1, len2, slen1, slen2;
201: int cmp_bytes, cmp_res;
202:
203: if (s1 == s2)
204: return 0;
205:
206: cmp_bytes = bwscmp(s1, s2, 0);
207: if (cmp_bytes == 0)
208: return 0;
209:
210: len1 = slen1 = BWSLEN(s1);
211: len2 = slen2 = BWSLEN(s2);
212:
213: if (slen1 < 1)
214: return -1;
215: if (slen2 < 1)
216: return 1;
217:
218: si1 = bws_begin(s1);
219: si2 = bws_begin(s2);
220:
221: c1 = bws_get_iter_value(si1);
222: c2 = bws_get_iter_value(si2);
223:
224: if (c1 == L'.' && (slen1 == 1))
225: return -1;
226:
227: if (c2 == L'.' && (slen2 == 1))
228: return 1;
229:
230: if (slen1 == 2 && c1 == L'.' &&
231: bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.')
232: return -1;
233: if (slen2 == 2 && c2 == L'.' &&
234: bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.')
235: return 1;
236:
237: if (c1 == L'.' && c2 != L'.')
238: return -1;
239: if (c1 != L'.' && c2 == L'.')
240: return 1;
241:
242: if (c1 == L'.' && c2 == L'.') {
243: si1 = bws_iterator_inc(si1, 1);
244: si2 = bws_iterator_inc(si2, 1);
245: }
246:
247: find_suffix(si1, bws_end(s1), &len1);
248: find_suffix(si2, bws_end(s2), &len2);
249:
250: if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0))
251: return cmp_bytes;
252:
253: cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2,
254: bws_iterator_inc(si2, len2));
255:
256: if (cmp_res == 0)
257: cmp_res = cmp_bytes;
258:
259: return cmp_res;
260: }