Annotation of src/usr.bin/sort/bwstring.c, Revision 1.7
1.7 ! millert 1: /* $OpenBSD: bwstring.c,v 1.6 2015/04/01 20:58:13 millert Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6: * All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
30: #include <ctype.h>
31: #include <errno.h>
32: #include <err.h>
33: #include <langinfo.h>
34: #include <math.h>
35: #include <stdlib.h>
36: #include <string.h>
37: #include <wchar.h>
38: #include <wctype.h>
39:
40: #include "bwstring.h"
41: #include "sort.h"
42:
43: bool byte_sort;
44: size_t sort_mb_cur_max = 1;
45:
46: static wchar_t **wmonths;
47: static char **cmonths;
48:
49: /* initialise months */
50:
51: void
52: initialise_months(void)
53: {
54: const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
55: ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
56: ABMON_11, ABMON_12 };
57: char *tmp;
58: size_t len;
59:
60: if (sort_mb_cur_max == 1) {
61: if (cmonths == NULL) {
62: char *m;
63: unsigned int j;
64: int i;
65:
66: cmonths = sort_malloc(sizeof(char *) * 12);
67: for (i = 0; i < 12; i++) {
68: cmonths[i] = NULL;
69: tmp = nl_langinfo(item[i]);
70: if (debug_sort)
71: printf("month[%d]=%s\n", i, tmp);
1.3 millert 72: if (*tmp == '\0')
1.1 millert 73: continue;
1.3 millert 74: m = sort_strdup(tmp);
1.4 millert 75: len = strlen(tmp);
1.1 millert 76: for (j = 0; j < len; j++)
77: m[j] = toupper(m[j]);
78: cmonths[i] = m;
79: }
80: }
81: } else {
82: if (wmonths == NULL) {
83: unsigned int j;
84: wchar_t *m;
85: int i;
86:
87: wmonths = sort_malloc(sizeof(wchar_t *) * 12);
88: for (i = 0; i < 12; i++) {
89: wmonths[i] = NULL;
90: tmp = nl_langinfo(item[i]);
91: if (debug_sort)
92: printf("month[%d]=%s\n", i, tmp);
1.3 millert 93: if (*tmp == '\0')
94: continue;
1.1 millert 95: len = strlen(tmp);
1.7 ! millert 96: m = sort_reallocarray(NULL, len + 1,
! 97: sizeof(wchar_t));
1.3 millert 98: if (mbstowcs(m, tmp, len) == (size_t)-1) {
99: sort_free(m);
1.1 millert 100: continue;
1.3 millert 101: }
1.1 millert 102: m[len] = L'\0';
103: for (j = 0; j < len; j++)
104: m[j] = towupper(m[j]);
105: wmonths[i] = m;
106: }
107: }
108: }
109: }
110:
111: /*
112: * Compare two wide-character strings
113: */
114: static int
115: wide_str_coll(const wchar_t *s1, const wchar_t *s2)
116: {
117: int ret = 0;
118:
119: errno = 0;
120: ret = wcscoll(s1, s2);
121: if (errno == EILSEQ) {
122: errno = 0;
123: ret = wcscmp(s1, s2);
124: if (errno != 0) {
125: size_t i;
126: for (i = 0; ; ++i) {
127: wchar_t c1 = s1[i];
128: wchar_t c2 = s2[i];
129: if (c1 == L'\0')
130: return (c2 == L'\0') ? 0 : -1;
131: if (c2 == L'\0')
132: return 1;
133: if (c1 == c2)
134: continue;
135: return (int)c1 - (int)c2;
136: }
137: }
138: }
139: return ret;
140: }
141:
142: /* counterparts of wcs functions */
143:
144: void
145: bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
146: {
147: if (sort_mb_cur_max == 1)
148: fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix);
149: else
150: fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix);
151: }
152:
153: const void *
154: bwsrawdata(const struct bwstring *bws)
155: {
156: return &(bws->data);
157: }
158:
159: size_t
160: bwsrawlen(const struct bwstring *bws)
161: {
162: return (sort_mb_cur_max == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len);
163: }
164:
165: size_t
166: bws_memsize(const struct bwstring *bws)
167: {
168: return (sort_mb_cur_max == 1) ? (bws->len + 2 + sizeof(struct bwstring)) :
169: (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring));
170: }
171:
172: void
173: bws_setlen(struct bwstring *bws, size_t newlen)
174: {
175: if (bws && newlen != bws->len && newlen <= bws->len) {
176: bws->len = newlen;
177: if (sort_mb_cur_max == 1)
178: bws->data.cstr[newlen] = '\0';
179: else
180: bws->data.wstr[newlen] = L'\0';
181: }
182: }
183:
184: /*
185: * Allocate a new binary string of specified size
186: */
187: struct bwstring *
188: bwsalloc(size_t sz)
189: {
190: struct bwstring *ret;
191:
192: if (sort_mb_cur_max == 1) {
193: ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
194: ret->data.cstr[sz] = '\0';
195: } else {
196: ret = sort_malloc(sizeof(struct bwstring) +
197: SIZEOF_WCHAR_STRING(sz + 1));
198: ret->data.wstr[sz] = L'\0';
199: }
200: ret->len = sz;
201:
202: return ret;
203: }
204:
205: /*
206: * Create a copy of binary string.
207: * New string size equals the length of the old string.
208: */
209: struct bwstring *
210: bwsdup(const struct bwstring *s)
211: {
212: struct bwstring *ret;
213:
214: if (s == NULL)
215: return NULL;
216:
217: ret = bwsalloc(s->len);
218:
219: if (sort_mb_cur_max == 1)
220: memcpy(ret->data.cstr, s->data.cstr, s->len);
221: else
222: memcpy(ret->data.wstr, s->data.wstr,
223: SIZEOF_WCHAR_STRING(s->len));
224:
225: return ret;
226: }
227:
228: /*
1.5 millert 229: * Create a new binary string from a wide character buffer.
1.1 millert 230: */
231: struct bwstring *
232: bwssbdup(const wchar_t *str, size_t len)
233: {
234: if (str == NULL)
235: return (len == 0) ? bwsalloc(0) : NULL;
236: else {
237: struct bwstring *ret;
238: size_t i;
239:
240: ret = bwsalloc(len);
241:
242: if (sort_mb_cur_max == 1)
243: for (i = 0; i < len; ++i)
244: ret->data.cstr[i] = (unsigned char) str[i];
245: else
246: memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len));
247:
248: return ret;
249: }
250: }
251:
252: /*
253: * Create a new binary string from a raw binary buffer.
254: */
255: struct bwstring *
256: bwscsbdup(const unsigned char *str, size_t len)
257: {
258: struct bwstring *ret;
259:
260: ret = bwsalloc(len);
261:
262: if (str) {
263: if (sort_mb_cur_max == 1)
264: memcpy(ret->data.cstr, str, len);
265: else {
266: mbstate_t mbs;
267: const char *s;
268: size_t charlen, chars, cptr;
269:
270: chars = 0;
271: cptr = 0;
272: s = (const char *) str;
273:
274: memset(&mbs, 0, sizeof(mbs));
275:
276: while (cptr < len) {
277: size_t n = sort_mb_cur_max;
278:
279: if (n > len - cptr)
280: n = len - cptr;
281: charlen = mbrlen(s + cptr, n, &mbs);
282: switch (charlen) {
283: case 0:
284: /* FALLTHROUGH */
285: case (size_t) -1:
286: /* FALLTHROUGH */
287: case (size_t) -2:
288: ret->data.wstr[chars++] =
289: (unsigned char) s[cptr];
290: ++cptr;
291: break;
292: default:
293: n = mbrtowc(ret->data.wstr + (chars++),
294: s + cptr, charlen, &mbs);
295: if ((n == (size_t)-1) || (n == (size_t)-2))
296: /* NOTREACHED */
297: err(2, "mbrtowc error");
298: cptr += charlen;
299: };
300: }
301:
302: ret->len = chars;
303: ret->data.wstr[ret->len] = L'\0';
304: }
305: }
306: return ret;
307: }
308:
309: /*
310: * De-allocate object memory
311: */
312: void
313: bwsfree(struct bwstring *s)
314: {
315: sort_free(s);
316: }
317:
318: /*
319: * Copy content of src binary string to dst.
320: * If the capacity of the dst string is not sufficient,
321: * then the data is truncated.
322: */
323: size_t
324: bwscpy(struct bwstring *dst, const struct bwstring *src)
325: {
326: size_t nums = src->len;
327:
328: if (nums > dst->len)
329: nums = dst->len;
330: dst->len = nums;
331:
332: if (sort_mb_cur_max == 1) {
333: memcpy(dst->data.cstr, src->data.cstr, nums);
334: dst->data.cstr[dst->len] = '\0';
335: } else {
336: memcpy(dst->data.wstr, src->data.wstr,
337: SIZEOF_WCHAR_STRING(nums + 1));
338: dst->data.wstr[dst->len] = L'\0';
339: }
340:
341: return nums;
342: }
343:
344: /*
345: * Copy content of src binary string to dst,
346: * with specified number of symbols to be copied.
347: * If the capacity of the dst string is not sufficient,
348: * then the data is truncated.
349: */
350: struct bwstring *
351: bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
352: {
353: size_t nums = src->len;
354:
355: if (nums > dst->len)
356: nums = dst->len;
357: if (nums > size)
358: nums = size;
359: dst->len = nums;
360:
361: if (sort_mb_cur_max == 1) {
362: memcpy(dst->data.cstr, src->data.cstr, nums);
363: dst->data.cstr[dst->len] = '\0';
364: } else {
365: memcpy(dst->data.wstr, src->data.wstr,
366: SIZEOF_WCHAR_STRING(nums + 1));
367: dst->data.wstr[dst->len] = L'\0';
368: }
369:
370: return dst;
371: }
372:
373: /*
374: * Copy content of src binary string to dst,
375: * with specified number of symbols to be copied.
376: * An offset value can be specified, from the start of src string.
377: * If the capacity of the dst string is not sufficient,
378: * then the data is truncated.
379: */
380: struct bwstring *
381: bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
382: size_t size)
383: {
384: if (offset >= src->len) {
385: dst->data.wstr[0] = 0;
386: dst->len = 0;
387: } else {
388: size_t nums = src->len - offset;
389:
390: if (nums > dst->len)
391: nums = dst->len;
392: if (nums > size)
393: nums = size;
394: dst->len = nums;
395: if (sort_mb_cur_max == 1) {
396: memcpy(dst->data.cstr, src->data.cstr + offset,
397: (nums));
398: dst->data.cstr[dst->len] = '\0';
399: } else {
400: memcpy(dst->data.wstr, src->data.wstr + offset,
401: SIZEOF_WCHAR_STRING(nums));
402: dst->data.wstr[dst->len] = L'\0';
403: }
404: }
405: return dst;
406: }
407:
408: /*
409: * Write binary string to the file.
410: * The output is ended either with '\n' (nl == true)
411: * or '\0' (nl == false).
412: */
413: size_t
414: bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
415: {
416: if (sort_mb_cur_max == 1) {
417: size_t len = bws->len;
418:
419: if (!zero_ended) {
420: bws->data.cstr[len] = '\n';
421:
422: if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
423: err(2, NULL);
424:
425: bws->data.cstr[len] = '\0';
426: } else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
427: err(2, NULL);
428:
429: return len + 1;
430:
431: } else {
432: wchar_t eols;
433: size_t printed = 0;
434:
435: eols = zero_ended ? btowc('\0') : btowc('\n');
436:
437: while (printed < BWSLEN(bws)) {
438: const wchar_t *s = bws->data.wstr + printed;
439:
440: if (*s == L'\0') {
441: int nums;
442:
443: nums = fwprintf(f, L"%lc", *s);
444:
445: if (nums != 1)
446: err(2, NULL);
447: ++printed;
448: } else {
449: int nums;
450:
451: nums = fwprintf(f, L"%ls", s);
452:
453: if (nums < 1)
454: err(2, NULL);
455: printed += nums;
456: }
457: }
458: fwprintf(f, L"%lc", eols);
459: return printed + 1;
460: }
461: }
462:
463: /*
464: * Allocate and read a binary string from file.
465: * The strings are nl-ended or zero-ended, depending on the sort setting.
466: */
467: struct bwstring *
468: bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
469: {
470: wint_t eols;
471:
472: eols = zero_ended ? btowc('\0') : btowc('\n');
473:
474: if (!zero_ended && (sort_mb_cur_max > 1)) {
475: wchar_t *ret;
476:
477: ret = fgetwln(f, len);
478:
479: if (ret == NULL) {
480: if (!feof(f))
481: err(2, NULL);
482: return NULL;
483: }
484: if (*len > 0) {
485: if (ret[*len - 1] == (wchar_t)eols)
486: --(*len);
487: }
488: return bwssbdup(ret, *len);
489:
490: } else if (!zero_ended && (sort_mb_cur_max == 1)) {
491: char *ret;
492:
493: ret = fgetln(f, len);
494:
495: if (ret == NULL) {
496: if (!feof(f))
497: err(2, NULL);
498: return NULL;
499: }
500: if (*len > 0) {
501: if (ret[*len - 1] == '\n')
502: --(*len);
503: }
504: return bwscsbdup((unsigned char *)ret, *len);
505:
506: } else {
507: *len = 0;
508:
509: if (feof(f))
510: return NULL;
511:
512: if (2 >= rb->fgetwln_z_buffer_size) {
513: rb->fgetwln_z_buffer_size += 256;
514: rb->fgetwln_z_buffer =
515: sort_reallocarray(rb->fgetwln_z_buffer,
516: rb->fgetwln_z_buffer_size, sizeof(wchar_t));
517: }
518: rb->fgetwln_z_buffer[*len] = 0;
519:
1.6 millert 520: if (sort_mb_cur_max == 1) {
1.1 millert 521: while (!feof(f)) {
522: int c;
523:
524: c = fgetc(f);
525:
526: if (c == EOF) {
527: if (*len == 0)
528: return NULL;
529: goto line_read_done;
530: }
531: if (c == eols)
532: goto line_read_done;
533:
534: if (*len + 1 >= rb->fgetwln_z_buffer_size) {
535: rb->fgetwln_z_buffer_size += 256;
536: rb->fgetwln_z_buffer =
537: sort_reallocarray(rb->fgetwln_z_buffer,
538: rb->fgetwln_z_buffer_size, sizeof(wchar_t));
539: }
540:
541: rb->fgetwln_z_buffer[*len] = c;
542: rb->fgetwln_z_buffer[++(*len)] = 0;
543: }
1.6 millert 544: } else {
1.1 millert 545: while (!feof(f)) {
546: wint_t c = 0;
547:
548: c = fgetwc(f);
549:
550: if (c == WEOF) {
551: if (*len == 0)
552: return NULL;
553: goto line_read_done;
554: }
555: if (c == eols)
556: goto line_read_done;
557:
558: if (*len + 1 >= rb->fgetwln_z_buffer_size) {
559: rb->fgetwln_z_buffer_size += 256;
560: rb->fgetwln_z_buffer =
561: sort_reallocarray(rb->fgetwln_z_buffer,
562: rb->fgetwln_z_buffer_size, sizeof(wchar_t));
563: }
564:
565: rb->fgetwln_z_buffer[*len] = c;
566: rb->fgetwln_z_buffer[++(*len)] = 0;
567: }
1.6 millert 568: }
1.1 millert 569:
570: line_read_done:
571: /* we do not count the last 0 */
572: return bwssbdup(rb->fgetwln_z_buffer, *len);
573: }
574: }
575:
576: int
577: bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
578: size_t offset, size_t len)
579: {
580: size_t cmp_len, len1, len2;
581: int res = 0;
582:
583: len1 = bws1->len;
584: len2 = bws2->len;
585:
586: if (len1 <= offset) {
587: return (len2 <= offset) ? 0 : -1;
588: } else {
589: if (len2 <= offset)
590: return 1;
591: else {
592: len1 -= offset;
593: len2 -= offset;
594:
595: cmp_len = len1;
596:
597: if (len2 < cmp_len)
598: cmp_len = len2;
599:
600: if (len < cmp_len)
601: cmp_len = len;
602:
603: if (sort_mb_cur_max == 1) {
604: const unsigned char *s1, *s2;
605:
606: s1 = bws1->data.cstr + offset;
607: s2 = bws2->data.cstr + offset;
608:
609: res = memcmp(s1, s2, cmp_len);
610:
611: } else {
612: const wchar_t *s1, *s2;
613:
614: s1 = bws1->data.wstr + offset;
615: s2 = bws2->data.wstr + offset;
616:
617: res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
618: }
619: }
620: }
621:
622: if (res == 0) {
623: if (len1 < cmp_len && len1 < len2)
624: res = -1;
625: else if (len2 < cmp_len && len2 < len1)
626: res = +1;
627: }
628:
629: return res;
630: }
631:
632: int
633: bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
634: {
635: size_t len1, len2, cmp_len;
636: int res;
637:
638: len1 = bws1->len;
639: len2 = bws2->len;
640:
641: len1 -= offset;
642: len2 -= offset;
643:
644: cmp_len = len1;
645:
646: if (len2 < cmp_len)
647: cmp_len = len2;
648:
649: res = bwsncmp(bws1, bws2, offset, cmp_len);
650:
651: if (res == 0) {
652: if (len1 < len2)
653: res = -1;
654: else if (len2 < len1)
655: res = +1;
656: }
657:
658: return res;
659: }
660:
661: int
662: bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
663: {
664: wchar_t c1, c2;
665: size_t i = 0;
666:
667: for (i = 0; i < len; ++i) {
668: c1 = bws_get_iter_value(iter1);
669: c2 = bws_get_iter_value(iter2);
670: if (c1 != c2)
671: return c1 - c2;
672: iter1 = bws_iterator_inc(iter1, 1);
673: iter2 = bws_iterator_inc(iter2, 1);
674: }
675:
676: return 0;
677: }
678:
679: int
680: bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
681: {
682: size_t len1, len2;
683:
684: len1 = bws1->len;
685: len2 = bws2->len;
686:
687: if (len1 <= offset)
688: return (len2 <= offset) ? 0 : -1;
689: else {
690: if (len2 <= offset)
691: return 1;
692: else {
693: len1 -= offset;
694: len2 -= offset;
695:
696: if (sort_mb_cur_max == 1) {
697: const unsigned char *s1, *s2;
698:
699: s1 = bws1->data.cstr + offset;
700: s2 = bws2->data.cstr + offset;
701:
702: if (byte_sort) {
703: int res = 0;
704:
705: if (len1 > len2) {
706: res = memcmp(s1, s2, len2);
707: if (!res)
708: res = +1;
709: } else if (len1 < len2) {
710: res = memcmp(s1, s2, len1);
711: if (!res)
712: res = -1;
713: } else
714: res = memcmp(s1, s2, len1);
715:
716: return res;
717:
718: } else {
719: int res = 0;
720: size_t i, maxlen;
721:
722: i = 0;
723: maxlen = len1;
724:
725: if (maxlen > len2)
726: maxlen = len2;
727:
728: while (i < maxlen) {
729: /* goto next non-zero part: */
730: while ((i < maxlen) &&
731: !s1[i] && !s2[i])
732: ++i;
733:
734: if (i >= maxlen)
735: break;
736:
737: if (s1[i] == 0) {
738: if (s2[i] == 0)
739: /* NOTREACHED */
740: err(2, "bwscoll error 01");
741: else
742: return -1;
743: } else if (s2[i] == 0)
744: return 1;
745:
746: res = strcoll((const char *)(s1 + i), (const char *)(s2 + i));
747: if (res)
748: return res;
749:
750: while ((i < maxlen) &&
751: s1[i] && s2[i])
752: ++i;
753:
754: if (i >= maxlen)
755: break;
756:
757: if (s1[i] == 0) {
758: if (s2[i] == 0) {
759: ++i;
760: continue;
761: } else
762: return -1;
763: } else if (s2[i] == 0)
764: return 1;
765: else
766: /* NOTREACHED */
767: err(2, "bwscoll error 02");
768: }
769:
770: if (len1 < len2)
771: return -1;
772: else if (len1 > len2)
773: return 1;
774:
775: return 0;
776: }
777: } else {
778: const wchar_t *s1, *s2;
779: size_t i, maxlen;
780: int res = 0;
781:
782: s1 = bws1->data.wstr + offset;
783: s2 = bws2->data.wstr + offset;
784:
785: i = 0;
786: maxlen = len1;
787:
788: if (maxlen > len2)
789: maxlen = len2;
790:
791: while (i < maxlen) {
792:
793: /* goto next non-zero part: */
794: while ((i < maxlen) &&
795: !s1[i] && !s2[i])
796: ++i;
797:
798: if (i >= maxlen)
799: break;
800:
801: if (s1[i] == 0) {
802: if (s2[i] == 0)
803: /* NOTREACHED */
804: err(2, "bwscoll error 1");
805: else
806: return -1;
807: } else if (s2[i] == 0)
808: return 1;
809:
810: res = wide_str_coll(s1 + i, s2 + i);
811: if (res)
812: return res;
813:
814: while ((i < maxlen) && s1[i] && s2[i])
815: ++i;
816:
817: if (i >= maxlen)
818: break;
819:
820: if (s1[i] == 0) {
821: if (s2[i] == 0) {
822: ++i;
823: continue;
824: } else
825: return -1;
826: } else if (s2[i] == 0)
827: return 1;
828: else
829: /* NOTREACHED */
830: err(2, "bwscoll error 2");
831: }
832:
833: if (len1 == len2)
834: return 0;
835: return len1 < len2 ? -1 : 1;
836: }
837: }
838: }
839: }
840:
841: /*
842: * Correction of the system API
843: */
844: double
845: bwstod(struct bwstring *s0, bool *empty)
846: {
847: double ret = 0;
848:
849: if (sort_mb_cur_max == 1) {
850: char *ep, *end, *s;
851:
852: s = (char *)s0->data.cstr;
853: end = s + s0->len;
854: ep = NULL;
855:
856: while (isblank((unsigned char)*s) && s < end)
857: ++s;
858:
859: if (!isprint((unsigned char)*s)) {
860: *empty = true;
861: return 0;
862: }
863:
864: ret = strtod(s, &ep);
865: if (ep == s) {
866: *empty = true;
867: return 0;
868: }
869: } else {
870: wchar_t *end, *ep, *s;
871:
872: s = s0->data.wstr;
873: end = s + s0->len;
874: ep = NULL;
875:
876: while (iswblank(*s) && s < end)
877: ++s;
878:
879: if (!iswprint(*s)) {
880: *empty = true;
881: return 0;
882: }
883:
884: ret = wcstod(s, &ep);
885: if (ep == s) {
886: *empty = true;
887: return 0;
888: }
889: }
890:
891: *empty = false;
892: return ret;
893: }
894:
895: /*
896: * A helper function for monthcoll. If a line matches
897: * a month name, it returns (number of the month - 1),
898: * while if there is no match, it just return -1.
899: */
900: int
901: bws_month_score(const struct bwstring *s0)
902: {
903: if (sort_mb_cur_max == 1) {
904: const char *end, *s;
905: int i;
906:
907: s = (char *)s0->data.cstr;
908: end = s + s0->len;
909:
910: while (isblank((unsigned char)*s) && s < end)
911: ++s;
912:
913: for (i = 11; i >= 0; --i) {
914: if (cmonths[i] &&
915: (s == strstr(s, cmonths[i])))
916: return i;
917: }
918: } else {
919: const wchar_t *end, *s;
920: int i;
921:
922: s = s0->data.wstr;
923: end = s + s0->len;
924:
925: while (iswblank(*s) && s < end)
926: ++s;
927:
928: for (i = 11; i >= 0; --i) {
929: if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
930: return i;
931: }
932: }
933:
934: return -1;
935: }
936:
937: /*
938: * Rips out leading blanks (-b).
939: */
940: struct bwstring *
941: ignore_leading_blanks(struct bwstring *str)
942: {
943: if (sort_mb_cur_max == 1) {
944: unsigned char *dst, *end, *src;
945:
946: src = str->data.cstr;
947: dst = src;
948: end = src + str->len;
949:
950: while (src < end && isblank(*src))
951: ++src;
952:
953: if (src != dst) {
954: size_t newlen;
955:
956: newlen = BWSLEN(str) - (src - dst);
957:
958: while (src < end) {
959: *dst = *src;
960: ++dst;
961: ++src;
962: }
963: bws_setlen(str, newlen);
964: }
965: } else {
966: wchar_t *dst, *end, *src;
967:
968: src = str->data.wstr;
969: dst = src;
970: end = src + str->len;
971:
972: while (src < end && iswblank(*src))
973: ++src;
974:
975: if (src != dst) {
976:
977: size_t newlen = BWSLEN(str) - (src - dst);
978:
979: while (src < end) {
980: *dst = *src;
981: ++dst;
982: ++src;
983: }
984: bws_setlen(str, newlen);
985:
986: }
987: }
988: return str;
989: }
990:
991: /*
992: * Rips out nonprinting characters (-i).
993: */
994: struct bwstring *
995: ignore_nonprinting(struct bwstring *str)
996: {
997: size_t newlen = str->len;
998:
999: if (sort_mb_cur_max == 1) {
1000: unsigned char *dst, *end, *src;
1001: unsigned char c;
1002:
1003: src = str->data.cstr;
1004: dst = src;
1005: end = src + str->len;
1006:
1007: while (src < end) {
1008: c = *src;
1009: if (isprint(c)) {
1010: *dst = c;
1011: ++dst;
1012: ++src;
1013: } else {
1014: ++src;
1015: --newlen;
1016: }
1017: }
1018: } else {
1019: wchar_t *dst, *end, *src;
1020: wchar_t c;
1021:
1022: src = str->data.wstr;
1023: dst = src;
1024: end = src + str->len;
1025:
1026: while (src < end) {
1027: c = *src;
1028: if (iswprint(c)) {
1029: *dst = c;
1030: ++dst;
1031: ++src;
1032: } else {
1033: ++src;
1034: --newlen;
1035: }
1036: }
1037: }
1038: bws_setlen(str, newlen);
1039:
1040: return str;
1041: }
1042:
1043: /*
1044: * Rips out any characters that are not alphanumeric characters
1045: * nor blanks (-d).
1046: */
1047: struct bwstring *
1048: dictionary_order(struct bwstring *str)
1049: {
1050: size_t newlen = str->len;
1051:
1052: if (sort_mb_cur_max == 1) {
1053: unsigned char *dst, *end, *src;
1054: unsigned char c;
1055:
1056: src = str->data.cstr;
1057: dst = src;
1058: end = src + str->len;
1059:
1060: while (src < end) {
1061: c = *src;
1062: if (isalnum(c) || isblank(c)) {
1063: *dst = c;
1064: ++dst;
1065: ++src;
1066: } else {
1067: ++src;
1068: --newlen;
1069: }
1070: }
1071: } else {
1072: wchar_t *dst, *end, *src;
1073: wchar_t c;
1074:
1075: src = str->data.wstr;
1076: dst = src;
1077: end = src + str->len;
1078:
1079: while (src < end) {
1080: c = *src;
1081: if (iswalnum(c) || iswblank(c)) {
1082: *dst = c;
1083: ++dst;
1084: ++src;
1085: } else {
1086: ++src;
1087: --newlen;
1088: }
1089: }
1090: }
1091: bws_setlen(str, newlen);
1092:
1093: return str;
1094: }
1095:
1096: /*
1097: * Converts string to lower case(-f).
1098: */
1099: struct bwstring *
1100: ignore_case(struct bwstring *str)
1101: {
1102: if (sort_mb_cur_max == 1) {
1103: unsigned char *end, *s;
1104:
1105: s = str->data.cstr;
1106: end = s + str->len;
1107:
1108: while (s < end) {
1109: *s = toupper(*s);
1110: ++s;
1111: }
1112: } else {
1113: wchar_t *end, *s;
1114:
1115: s = str->data.wstr;
1116: end = s + str->len;
1117:
1118: while (s < end) {
1119: *s = towupper(*s);
1120: ++s;
1121: }
1122: }
1123: return str;
1124: }
1125:
1126: void
1127: bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
1128: {
1129: if (sort_mb_cur_max == 1)
1130: warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr);
1131: else
1132: warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr);
1133: }