Annotation of src/usr.bin/sort/bwstring.c, Revision 1.4
1.4 ! millert 1: /* $OpenBSD: bwstring.c,v 1.3 2015/03/18 22:53:27 millert Exp $ */
1.1 millert 2:
3: /*-
4: * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6: * All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27: * SUCH DAMAGE.
28: */
29:
30: #include <ctype.h>
31: #include <errno.h>
32: #include <err.h>
33: #include <langinfo.h>
34: #include <math.h>
35: #include <stdlib.h>
36: #include <string.h>
37: #include <wchar.h>
38: #include <wctype.h>
39:
40: #include "bwstring.h"
41: #include "sort.h"
42:
43: bool byte_sort;
44: size_t sort_mb_cur_max = 1;
45:
46: static wchar_t **wmonths;
47: static char **cmonths;
48:
49: /* initialise months */
50:
51: void
52: initialise_months(void)
53: {
54: const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
55: ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
56: ABMON_11, ABMON_12 };
57: char *tmp;
58: size_t len;
59:
60: if (sort_mb_cur_max == 1) {
61: if (cmonths == NULL) {
62: char *m;
63: unsigned int j;
64: int i;
65:
66: cmonths = sort_malloc(sizeof(char *) * 12);
67: for (i = 0; i < 12; i++) {
68: cmonths[i] = NULL;
69: tmp = nl_langinfo(item[i]);
70: if (debug_sort)
71: printf("month[%d]=%s\n", i, tmp);
1.3 millert 72: if (*tmp == '\0')
1.1 millert 73: continue;
1.3 millert 74: m = sort_strdup(tmp);
1.4 ! millert 75: len = strlen(tmp);
1.1 millert 76: for (j = 0; j < len; j++)
77: m[j] = toupper(m[j]);
78: cmonths[i] = m;
79: }
80: }
81: } else {
82: if (wmonths == NULL) {
83: unsigned int j;
84: wchar_t *m;
85: int i;
86:
87: wmonths = sort_malloc(sizeof(wchar_t *) * 12);
88: for (i = 0; i < 12; i++) {
89: wmonths[i] = NULL;
90: tmp = nl_langinfo(item[i]);
91: if (debug_sort)
92: printf("month[%d]=%s\n", i, tmp);
1.3 millert 93: if (*tmp == '\0')
94: continue;
1.1 millert 95: len = strlen(tmp);
96: m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
1.3 millert 97: if (mbstowcs(m, tmp, len) == (size_t)-1) {
98: sort_free(m);
1.1 millert 99: continue;
1.3 millert 100: }
1.1 millert 101: m[len] = L'\0';
102: for (j = 0; j < len; j++)
103: m[j] = towupper(m[j]);
104: wmonths[i] = m;
105: }
106: }
107: }
108: }
109:
110: /*
111: * Compare two wide-character strings
112: */
113: static int
114: wide_str_coll(const wchar_t *s1, const wchar_t *s2)
115: {
116: int ret = 0;
117:
118: errno = 0;
119: ret = wcscoll(s1, s2);
120: if (errno == EILSEQ) {
121: errno = 0;
122: ret = wcscmp(s1, s2);
123: if (errno != 0) {
124: size_t i;
125: for (i = 0; ; ++i) {
126: wchar_t c1 = s1[i];
127: wchar_t c2 = s2[i];
128: if (c1 == L'\0')
129: return (c2 == L'\0') ? 0 : -1;
130: if (c2 == L'\0')
131: return 1;
132: if (c1 == c2)
133: continue;
134: return (int)c1 - (int)c2;
135: }
136: }
137: }
138: return ret;
139: }
140:
141: /* counterparts of wcs functions */
142:
143: void
144: bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
145: {
146:
147: if (sort_mb_cur_max == 1)
148: fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix);
149: else
150: fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix);
151: }
152:
153: const void *
154: bwsrawdata(const struct bwstring *bws)
155: {
156:
157: return &(bws->data);
158: }
159:
160: size_t
161: bwsrawlen(const struct bwstring *bws)
162: {
163:
164: return (sort_mb_cur_max == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len);
165: }
166:
167: size_t
168: bws_memsize(const struct bwstring *bws)
169: {
170:
171: return (sort_mb_cur_max == 1) ? (bws->len + 2 + sizeof(struct bwstring)) :
172: (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring));
173: }
174:
175: void
176: bws_setlen(struct bwstring *bws, size_t newlen)
177: {
178:
179: if (bws && newlen != bws->len && newlen <= bws->len) {
180: bws->len = newlen;
181: if (sort_mb_cur_max == 1)
182: bws->data.cstr[newlen] = '\0';
183: else
184: bws->data.wstr[newlen] = L'\0';
185: }
186: }
187:
188: /*
189: * Allocate a new binary string of specified size
190: */
191: struct bwstring *
192: bwsalloc(size_t sz)
193: {
194: struct bwstring *ret;
195:
196: if (sort_mb_cur_max == 1) {
197: ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
198: ret->data.cstr[sz] = '\0';
199: } else {
200: ret = sort_malloc(sizeof(struct bwstring) +
201: SIZEOF_WCHAR_STRING(sz + 1));
202: ret->data.wstr[sz] = L'\0';
203: }
204: ret->len = sz;
205:
206: return ret;
207: }
208:
209: /*
210: * Create a copy of binary string.
211: * New string size equals the length of the old string.
212: */
213: struct bwstring *
214: bwsdup(const struct bwstring *s)
215: {
216: struct bwstring *ret;
217:
218: if (s == NULL)
219: return NULL;
220:
221: ret = bwsalloc(s->len);
222:
223: if (sort_mb_cur_max == 1)
224: memcpy(ret->data.cstr, s->data.cstr, s->len);
225: else
226: memcpy(ret->data.wstr, s->data.wstr,
227: SIZEOF_WCHAR_STRING(s->len));
228:
229: return ret;
230: }
231:
232: /*
233: * Create a new binary string from a raw binary buffer.
234: */
235: struct bwstring *
236: bwssbdup(const wchar_t *str, size_t len)
237: {
238:
239: if (str == NULL)
240: return (len == 0) ? bwsalloc(0) : NULL;
241: else {
242: struct bwstring *ret;
243: size_t i;
244:
245: ret = bwsalloc(len);
246:
247: if (sort_mb_cur_max == 1)
248: for (i = 0; i < len; ++i)
249: ret->data.cstr[i] = (unsigned char) str[i];
250: else
251: memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len));
252:
253: return ret;
254: }
255: }
256:
257: /*
258: * Create a new binary string from a raw binary buffer.
259: */
260: struct bwstring *
261: bwscsbdup(const unsigned char *str, size_t len)
262: {
263: struct bwstring *ret;
264:
265: ret = bwsalloc(len);
266:
267: if (str) {
268: if (sort_mb_cur_max == 1)
269: memcpy(ret->data.cstr, str, len);
270: else {
271: mbstate_t mbs;
272: const char *s;
273: size_t charlen, chars, cptr;
274:
275: chars = 0;
276: cptr = 0;
277: s = (const char *) str;
278:
279: memset(&mbs, 0, sizeof(mbs));
280:
281: while (cptr < len) {
282: size_t n = sort_mb_cur_max;
283:
284: if (n > len - cptr)
285: n = len - cptr;
286: charlen = mbrlen(s + cptr, n, &mbs);
287: switch (charlen) {
288: case 0:
289: /* FALLTHROUGH */
290: case (size_t) -1:
291: /* FALLTHROUGH */
292: case (size_t) -2:
293: ret->data.wstr[chars++] =
294: (unsigned char) s[cptr];
295: ++cptr;
296: break;
297: default:
298: n = mbrtowc(ret->data.wstr + (chars++),
299: s + cptr, charlen, &mbs);
300: if ((n == (size_t)-1) || (n == (size_t)-2))
301: /* NOTREACHED */
302: err(2, "mbrtowc error");
303: cptr += charlen;
304: };
305: }
306:
307: ret->len = chars;
308: ret->data.wstr[ret->len] = L'\0';
309: }
310: }
311: return ret;
312: }
313:
314: /*
315: * De-allocate object memory
316: */
317: void
318: bwsfree(struct bwstring *s)
319: {
320:
321: sort_free(s);
322: }
323:
324: /*
325: * Copy content of src binary string to dst.
326: * If the capacity of the dst string is not sufficient,
327: * then the data is truncated.
328: */
329: size_t
330: bwscpy(struct bwstring *dst, const struct bwstring *src)
331: {
332: size_t nums = src->len;
333:
334: if (nums > dst->len)
335: nums = dst->len;
336: dst->len = nums;
337:
338: if (sort_mb_cur_max == 1) {
339: memcpy(dst->data.cstr, src->data.cstr, nums);
340: dst->data.cstr[dst->len] = '\0';
341: } else {
342: memcpy(dst->data.wstr, src->data.wstr,
343: SIZEOF_WCHAR_STRING(nums + 1));
344: dst->data.wstr[dst->len] = L'\0';
345: }
346:
347: return nums;
348: }
349:
350: /*
351: * Copy content of src binary string to dst,
352: * with specified number of symbols to be copied.
353: * If the capacity of the dst string is not sufficient,
354: * then the data is truncated.
355: */
356: struct bwstring *
357: bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
358: {
359: size_t nums = src->len;
360:
361: if (nums > dst->len)
362: nums = dst->len;
363: if (nums > size)
364: nums = size;
365: dst->len = nums;
366:
367: if (sort_mb_cur_max == 1) {
368: memcpy(dst->data.cstr, src->data.cstr, nums);
369: dst->data.cstr[dst->len] = '\0';
370: } else {
371: memcpy(dst->data.wstr, src->data.wstr,
372: SIZEOF_WCHAR_STRING(nums + 1));
373: dst->data.wstr[dst->len] = L'\0';
374: }
375:
376: return dst;
377: }
378:
379: /*
380: * Copy content of src binary string to dst,
381: * with specified number of symbols to be copied.
382: * An offset value can be specified, from the start of src string.
383: * If the capacity of the dst string is not sufficient,
384: * then the data is truncated.
385: */
386: struct bwstring *
387: bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
388: size_t size)
389: {
390:
391: if (offset >= src->len) {
392: dst->data.wstr[0] = 0;
393: dst->len = 0;
394: } else {
395: size_t nums = src->len - offset;
396:
397: if (nums > dst->len)
398: nums = dst->len;
399: if (nums > size)
400: nums = size;
401: dst->len = nums;
402: if (sort_mb_cur_max == 1) {
403: memcpy(dst->data.cstr, src->data.cstr + offset,
404: (nums));
405: dst->data.cstr[dst->len] = '\0';
406: } else {
407: memcpy(dst->data.wstr, src->data.wstr + offset,
408: SIZEOF_WCHAR_STRING(nums));
409: dst->data.wstr[dst->len] = L'\0';
410: }
411: }
412: return dst;
413: }
414:
415: /*
416: * Write binary string to the file.
417: * The output is ended either with '\n' (nl == true)
418: * or '\0' (nl == false).
419: */
420: size_t
421: bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
422: {
423:
424: if (sort_mb_cur_max == 1) {
425: size_t len = bws->len;
426:
427: if (!zero_ended) {
428: bws->data.cstr[len] = '\n';
429:
430: if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
431: err(2, NULL);
432:
433: bws->data.cstr[len] = '\0';
434: } else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
435: err(2, NULL);
436:
437: return len + 1;
438:
439: } else {
440: wchar_t eols;
441: size_t printed = 0;
442:
443: eols = zero_ended ? btowc('\0') : btowc('\n');
444:
445: while (printed < BWSLEN(bws)) {
446: const wchar_t *s = bws->data.wstr + printed;
447:
448: if (*s == L'\0') {
449: int nums;
450:
451: nums = fwprintf(f, L"%lc", *s);
452:
453: if (nums != 1)
454: err(2, NULL);
455: ++printed;
456: } else {
457: int nums;
458:
459: nums = fwprintf(f, L"%ls", s);
460:
461: if (nums < 1)
462: err(2, NULL);
463: printed += nums;
464: }
465: }
466: fwprintf(f, L"%lc", eols);
467: return printed + 1;
468: }
469: }
470:
471: /*
472: * Allocate and read a binary string from file.
473: * The strings are nl-ended or zero-ended, depending on the sort setting.
474: */
475: struct bwstring *
476: bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
477: {
478: wint_t eols;
479:
480: eols = zero_ended ? btowc('\0') : btowc('\n');
481:
482: if (!zero_ended && (sort_mb_cur_max > 1)) {
483: wchar_t *ret;
484:
485: ret = fgetwln(f, len);
486:
487: if (ret == NULL) {
488: if (!feof(f))
489: err(2, NULL);
490: return NULL;
491: }
492: if (*len > 0) {
493: if (ret[*len - 1] == (wchar_t)eols)
494: --(*len);
495: }
496: return bwssbdup(ret, *len);
497:
498: } else if (!zero_ended && (sort_mb_cur_max == 1)) {
499: char *ret;
500:
501: ret = fgetln(f, len);
502:
503: if (ret == NULL) {
504: if (!feof(f))
505: err(2, NULL);
506: return NULL;
507: }
508: if (*len > 0) {
509: if (ret[*len - 1] == '\n')
510: --(*len);
511: }
512: return bwscsbdup((unsigned char *)ret, *len);
513:
514: } else {
515: *len = 0;
516:
517: if (feof(f))
518: return NULL;
519:
520: if (2 >= rb->fgetwln_z_buffer_size) {
521: rb->fgetwln_z_buffer_size += 256;
522: rb->fgetwln_z_buffer =
523: sort_reallocarray(rb->fgetwln_z_buffer,
524: rb->fgetwln_z_buffer_size, sizeof(wchar_t));
525: }
526: rb->fgetwln_z_buffer[*len] = 0;
527:
528: if (sort_mb_cur_max == 1)
529: while (!feof(f)) {
530: int c;
531:
532: c = fgetc(f);
533:
534: if (c == EOF) {
535: if (*len == 0)
536: return NULL;
537: goto line_read_done;
538: }
539: if (c == eols)
540: goto line_read_done;
541:
542: if (*len + 1 >= rb->fgetwln_z_buffer_size) {
543: rb->fgetwln_z_buffer_size += 256;
544: rb->fgetwln_z_buffer =
545: sort_reallocarray(rb->fgetwln_z_buffer,
546: rb->fgetwln_z_buffer_size, sizeof(wchar_t));
547: }
548:
549: rb->fgetwln_z_buffer[*len] = c;
550: rb->fgetwln_z_buffer[++(*len)] = 0;
551: }
552: else
553: while (!feof(f)) {
554: wint_t c = 0;
555:
556: c = fgetwc(f);
557:
558: if (c == WEOF) {
559: if (*len == 0)
560: return NULL;
561: goto line_read_done;
562: }
563: if (c == eols)
564: goto line_read_done;
565:
566: if (*len + 1 >= rb->fgetwln_z_buffer_size) {
567: rb->fgetwln_z_buffer_size += 256;
568: rb->fgetwln_z_buffer =
569: sort_reallocarray(rb->fgetwln_z_buffer,
570: rb->fgetwln_z_buffer_size, sizeof(wchar_t));
571: }
572:
573: rb->fgetwln_z_buffer[*len] = c;
574: rb->fgetwln_z_buffer[++(*len)] = 0;
575: }
576:
577: line_read_done:
578: /* we do not count the last 0 */
579: return bwssbdup(rb->fgetwln_z_buffer, *len);
580: }
581: }
582:
583: int
584: bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
585: size_t offset, size_t len)
586: {
587: size_t cmp_len, len1, len2;
588: int res = 0;
589:
590: len1 = bws1->len;
591: len2 = bws2->len;
592:
593: if (len1 <= offset) {
594: return (len2 <= offset) ? 0 : -1;
595: } else {
596: if (len2 <= offset)
597: return 1;
598: else {
599: len1 -= offset;
600: len2 -= offset;
601:
602: cmp_len = len1;
603:
604: if (len2 < cmp_len)
605: cmp_len = len2;
606:
607: if (len < cmp_len)
608: cmp_len = len;
609:
610: if (sort_mb_cur_max == 1) {
611: const unsigned char *s1, *s2;
612:
613: s1 = bws1->data.cstr + offset;
614: s2 = bws2->data.cstr + offset;
615:
616: res = memcmp(s1, s2, cmp_len);
617:
618: } else {
619: const wchar_t *s1, *s2;
620:
621: s1 = bws1->data.wstr + offset;
622: s2 = bws2->data.wstr + offset;
623:
624: res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
625: }
626: }
627: }
628:
629: if (res == 0) {
630: if (len1 < cmp_len && len1 < len2)
631: res = -1;
632: else if (len2 < cmp_len && len2 < len1)
633: res = +1;
634: }
635:
636: return res;
637: }
638:
639: int
640: bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
641: {
642: size_t len1, len2, cmp_len;
643: int res;
644:
645: len1 = bws1->len;
646: len2 = bws2->len;
647:
648: len1 -= offset;
649: len2 -= offset;
650:
651: cmp_len = len1;
652:
653: if (len2 < cmp_len)
654: cmp_len = len2;
655:
656: res = bwsncmp(bws1, bws2, offset, cmp_len);
657:
658: if (res == 0) {
659: if (len1 < len2)
660: res = -1;
661: else if (len2 < len1)
662: res = +1;
663: }
664:
665: return res;
666: }
667:
668: int
669: bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
670: {
671: wchar_t c1, c2;
672: size_t i = 0;
673:
674: for (i = 0; i < len; ++i) {
675: c1 = bws_get_iter_value(iter1);
676: c2 = bws_get_iter_value(iter2);
677: if (c1 != c2)
678: return c1 - c2;
679: iter1 = bws_iterator_inc(iter1, 1);
680: iter2 = bws_iterator_inc(iter2, 1);
681: }
682:
683: return 0;
684: }
685:
686: int
687: bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
688: {
689: size_t len1, len2;
690:
691: len1 = bws1->len;
692: len2 = bws2->len;
693:
694: if (len1 <= offset)
695: return (len2 <= offset) ? 0 : -1;
696: else {
697: if (len2 <= offset)
698: return 1;
699: else {
700: len1 -= offset;
701: len2 -= offset;
702:
703: if (sort_mb_cur_max == 1) {
704: const unsigned char *s1, *s2;
705:
706: s1 = bws1->data.cstr + offset;
707: s2 = bws2->data.cstr + offset;
708:
709: if (byte_sort) {
710: int res = 0;
711:
712: if (len1 > len2) {
713: res = memcmp(s1, s2, len2);
714: if (!res)
715: res = +1;
716: } else if (len1 < len2) {
717: res = memcmp(s1, s2, len1);
718: if (!res)
719: res = -1;
720: } else
721: res = memcmp(s1, s2, len1);
722:
723: return res;
724:
725: } else {
726: int res = 0;
727: size_t i, maxlen;
728:
729: i = 0;
730: maxlen = len1;
731:
732: if (maxlen > len2)
733: maxlen = len2;
734:
735: while (i < maxlen) {
736: /* goto next non-zero part: */
737: while ((i < maxlen) &&
738: !s1[i] && !s2[i])
739: ++i;
740:
741: if (i >= maxlen)
742: break;
743:
744: if (s1[i] == 0) {
745: if (s2[i] == 0)
746: /* NOTREACHED */
747: err(2, "bwscoll error 01");
748: else
749: return -1;
750: } else if (s2[i] == 0)
751: return 1;
752:
753: res = strcoll((const char *)(s1 + i), (const char *)(s2 + i));
754: if (res)
755: return res;
756:
757: while ((i < maxlen) &&
758: s1[i] && s2[i])
759: ++i;
760:
761: if (i >= maxlen)
762: break;
763:
764: if (s1[i] == 0) {
765: if (s2[i] == 0) {
766: ++i;
767: continue;
768: } else
769: return -1;
770: } else if (s2[i] == 0)
771: return 1;
772: else
773: /* NOTREACHED */
774: err(2, "bwscoll error 02");
775: }
776:
777: if (len1 < len2)
778: return -1;
779: else if (len1 > len2)
780: return 1;
781:
782: return 0;
783: }
784: } else {
785: const wchar_t *s1, *s2;
786: size_t i, maxlen;
787: int res = 0;
788:
789: s1 = bws1->data.wstr + offset;
790: s2 = bws2->data.wstr + offset;
791:
792: i = 0;
793: maxlen = len1;
794:
795: if (maxlen > len2)
796: maxlen = len2;
797:
798: while (i < maxlen) {
799:
800: /* goto next non-zero part: */
801: while ((i < maxlen) &&
802: !s1[i] && !s2[i])
803: ++i;
804:
805: if (i >= maxlen)
806: break;
807:
808: if (s1[i] == 0) {
809: if (s2[i] == 0)
810: /* NOTREACHED */
811: err(2, "bwscoll error 1");
812: else
813: return -1;
814: } else if (s2[i] == 0)
815: return 1;
816:
817: res = wide_str_coll(s1 + i, s2 + i);
818: if (res)
819: return res;
820:
821: while ((i < maxlen) && s1[i] && s2[i])
822: ++i;
823:
824: if (i >= maxlen)
825: break;
826:
827: if (s1[i] == 0) {
828: if (s2[i] == 0) {
829: ++i;
830: continue;
831: } else
832: return -1;
833: } else if (s2[i] == 0)
834: return 1;
835: else
836: /* NOTREACHED */
837: err(2, "bwscoll error 2");
838: }
839:
840: if (len1 == len2)
841: return 0;
842: return len1 < len2 ? -1 : 1;
843: }
844: }
845: }
846: }
847:
848: /*
849: * Correction of the system API
850: */
851: double
852: bwstod(struct bwstring *s0, bool *empty)
853: {
854: double ret = 0;
855:
856: if (sort_mb_cur_max == 1) {
857: char *ep, *end, *s;
858:
859: s = (char *)s0->data.cstr;
860: end = s + s0->len;
861: ep = NULL;
862:
863: while (isblank((unsigned char)*s) && s < end)
864: ++s;
865:
866: if (!isprint((unsigned char)*s)) {
867: *empty = true;
868: return 0;
869: }
870:
871: ret = strtod(s, &ep);
872: if (ep == s) {
873: *empty = true;
874: return 0;
875: }
876: } else {
877: wchar_t *end, *ep, *s;
878:
879: s = s0->data.wstr;
880: end = s + s0->len;
881: ep = NULL;
882:
883: while (iswblank(*s) && s < end)
884: ++s;
885:
886: if (!iswprint(*s)) {
887: *empty = true;
888: return 0;
889: }
890:
891: ret = wcstod(s, &ep);
892: if (ep == s) {
893: *empty = true;
894: return 0;
895: }
896: }
897:
898: *empty = false;
899: return ret;
900: }
901:
902: /*
903: * A helper function for monthcoll. If a line matches
904: * a month name, it returns (number of the month - 1),
905: * while if there is no match, it just return -1.
906: */
907:
908: int
909: bws_month_score(const struct bwstring *s0)
910: {
911:
912: if (sort_mb_cur_max == 1) {
913: const char *end, *s;
914: int i;
915:
916: s = (char *)s0->data.cstr;
917: end = s + s0->len;
918:
919: while (isblank((unsigned char)*s) && s < end)
920: ++s;
921:
922: for (i = 11; i >= 0; --i) {
923: if (cmonths[i] &&
924: (s == strstr(s, cmonths[i])))
925: return i;
926: }
927: } else {
928: const wchar_t *end, *s;
929: int i;
930:
931: s = s0->data.wstr;
932: end = s + s0->len;
933:
934: while (iswblank(*s) && s < end)
935: ++s;
936:
937: for (i = 11; i >= 0; --i) {
938: if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
939: return i;
940: }
941: }
942:
943: return -1;
944: }
945:
946: /*
947: * Rips out leading blanks (-b).
948: */
949: struct bwstring *
950: ignore_leading_blanks(struct bwstring *str)
951: {
952:
953: if (sort_mb_cur_max == 1) {
954: unsigned char *dst, *end, *src;
955:
956: src = str->data.cstr;
957: dst = src;
958: end = src + str->len;
959:
960: while (src < end && isblank(*src))
961: ++src;
962:
963: if (src != dst) {
964: size_t newlen;
965:
966: newlen = BWSLEN(str) - (src - dst);
967:
968: while (src < end) {
969: *dst = *src;
970: ++dst;
971: ++src;
972: }
973: bws_setlen(str, newlen);
974: }
975: } else {
976: wchar_t *dst, *end, *src;
977:
978: src = str->data.wstr;
979: dst = src;
980: end = src + str->len;
981:
982: while (src < end && iswblank(*src))
983: ++src;
984:
985: if (src != dst) {
986:
987: size_t newlen = BWSLEN(str) - (src - dst);
988:
989: while (src < end) {
990: *dst = *src;
991: ++dst;
992: ++src;
993: }
994: bws_setlen(str, newlen);
995:
996: }
997: }
998: return str;
999: }
1000:
1001: /*
1002: * Rips out nonprinting characters (-i).
1003: */
1004: struct bwstring *
1005: ignore_nonprinting(struct bwstring *str)
1006: {
1007: size_t newlen = str->len;
1008:
1009: if (sort_mb_cur_max == 1) {
1010: unsigned char *dst, *end, *src;
1011: unsigned char c;
1012:
1013: src = str->data.cstr;
1014: dst = src;
1015: end = src + str->len;
1016:
1017: while (src < end) {
1018: c = *src;
1019: if (isprint(c)) {
1020: *dst = c;
1021: ++dst;
1022: ++src;
1023: } else {
1024: ++src;
1025: --newlen;
1026: }
1027: }
1028: } else {
1029: wchar_t *dst, *end, *src;
1030: wchar_t c;
1031:
1032: src = str->data.wstr;
1033: dst = src;
1034: end = src + str->len;
1035:
1036: while (src < end) {
1037: c = *src;
1038: if (iswprint(c)) {
1039: *dst = c;
1040: ++dst;
1041: ++src;
1042: } else {
1043: ++src;
1044: --newlen;
1045: }
1046: }
1047: }
1048: bws_setlen(str, newlen);
1049:
1050: return str;
1051: }
1052:
1053: /*
1054: * Rips out any characters that are not alphanumeric characters
1055: * nor blanks (-d).
1056: */
1057: struct bwstring *
1058: dictionary_order(struct bwstring *str)
1059: {
1060: size_t newlen = str->len;
1061:
1062: if (sort_mb_cur_max == 1) {
1063: unsigned char *dst, *end, *src;
1064: unsigned char c;
1065:
1066: src = str->data.cstr;
1067: dst = src;
1068: end = src + str->len;
1069:
1070: while (src < end) {
1071: c = *src;
1072: if (isalnum(c) || isblank(c)) {
1073: *dst = c;
1074: ++dst;
1075: ++src;
1076: } else {
1077: ++src;
1078: --newlen;
1079: }
1080: }
1081: } else {
1082: wchar_t *dst, *end, *src;
1083: wchar_t c;
1084:
1085: src = str->data.wstr;
1086: dst = src;
1087: end = src + str->len;
1088:
1089: while (src < end) {
1090: c = *src;
1091: if (iswalnum(c) || iswblank(c)) {
1092: *dst = c;
1093: ++dst;
1094: ++src;
1095: } else {
1096: ++src;
1097: --newlen;
1098: }
1099: }
1100: }
1101: bws_setlen(str, newlen);
1102:
1103: return str;
1104: }
1105:
1106: /*
1107: * Converts string to lower case(-f).
1108: */
1109: struct bwstring *
1110: ignore_case(struct bwstring *str)
1111: {
1112:
1113: if (sort_mb_cur_max == 1) {
1114: unsigned char *end, *s;
1115:
1116: s = str->data.cstr;
1117: end = s + str->len;
1118:
1119: while (s < end) {
1120: *s = toupper(*s);
1121: ++s;
1122: }
1123: } else {
1124: wchar_t *end, *s;
1125:
1126: s = str->data.wstr;
1127: end = s + str->len;
1128:
1129: while (s < end) {
1130: *s = towupper(*s);
1131: ++s;
1132: }
1133: }
1134: return str;
1135: }
1136:
1137: void
1138: bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
1139: {
1140:
1141: if (sort_mb_cur_max == 1)
1142: warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr);
1143: else
1144: warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr);
1145: }