Annotation of src/usr.bin/sort/bwstring.c, Revision 1.1
1.1 ! millert 1: /* $OpenBSD$ */
! 2:
! 3: /*-
! 4: * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
! 5: * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
! 6: * All rights reserved.
! 7: *
! 8: * Redistribution and use in source and binary forms, with or without
! 9: * modification, are permitted provided that the following conditions
! 10: * are met:
! 11: * 1. Redistributions of source code must retain the above copyright
! 12: * notice, this list of conditions and the following disclaimer.
! 13: * 2. Redistributions in binary form must reproduce the above copyright
! 14: * notice, this list of conditions and the following disclaimer in the
! 15: * documentation and/or other materials provided with the distribution.
! 16: *
! 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
! 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
! 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 27: * SUCH DAMAGE.
! 28: */
! 29:
! 30: #include <ctype.h>
! 31: #include <errno.h>
! 32: #include <err.h>
! 33: #include <langinfo.h>
! 34: #include <math.h>
! 35: #include <stdlib.h>
! 36: #include <string.h>
! 37: #include <wchar.h>
! 38: #include <wctype.h>
! 39:
! 40: #include "bwstring.h"
! 41: #include "sort.h"
! 42:
! 43: bool byte_sort;
! 44: size_t sort_mb_cur_max = 1;
! 45:
! 46: static wchar_t **wmonths;
! 47: static char **cmonths;
! 48:
! 49: /* initialise months */
! 50:
! 51: void
! 52: initialise_months(void)
! 53: {
! 54: const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
! 55: ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
! 56: ABMON_11, ABMON_12 };
! 57: char *tmp;
! 58: size_t len;
! 59:
! 60: if (sort_mb_cur_max == 1) {
! 61: if (cmonths == NULL) {
! 62: char *m;
! 63: unsigned int j;
! 64: int i;
! 65:
! 66: cmonths = sort_malloc(sizeof(char *) * 12);
! 67: for (i = 0; i < 12; i++) {
! 68: cmonths[i] = NULL;
! 69: tmp = nl_langinfo(item[i]);
! 70: if (tmp == NULL)
! 71: continue;
! 72: if (debug_sort)
! 73: printf("month[%d]=%s\n", i, tmp);
! 74: len = strlen(tmp);
! 75: if (len < 1)
! 76: continue;
! 77: while (isblank((unsigned char)*tmp))
! 78: ++tmp;
! 79: m = sort_malloc(len + 1);
! 80: memcpy(m, tmp, len + 1);
! 81: m[len] = '\0';
! 82: for (j = 0; j < len; j++)
! 83: m[j] = toupper(m[j]);
! 84: cmonths[i] = m;
! 85: }
! 86: }
! 87: } else {
! 88: if (wmonths == NULL) {
! 89: unsigned int j;
! 90: wchar_t *m;
! 91: int i;
! 92:
! 93: wmonths = sort_malloc(sizeof(wchar_t *) * 12);
! 94: for (i = 0; i < 12; i++) {
! 95: wmonths[i] = NULL;
! 96: tmp = nl_langinfo(item[i]);
! 97: if (tmp == NULL)
! 98: continue;
! 99: if (debug_sort)
! 100: printf("month[%d]=%s\n", i, tmp);
! 101: len = strlen(tmp);
! 102: if (len < 1)
! 103: continue;
! 104: while (isblank((unsigned char)*tmp))
! 105: ++tmp;
! 106: m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
! 107: if (mbstowcs(m, tmp, len) == (size_t)-1)
! 108: continue;
! 109: m[len] = L'\0';
! 110: for (j = 0; j < len; j++)
! 111: m[j] = towupper(m[j]);
! 112: wmonths[i] = m;
! 113: }
! 114: }
! 115: }
! 116: }
! 117:
! 118: /*
! 119: * Compare two wide-character strings
! 120: */
! 121: static int
! 122: wide_str_coll(const wchar_t *s1, const wchar_t *s2)
! 123: {
! 124: int ret = 0;
! 125:
! 126: errno = 0;
! 127: ret = wcscoll(s1, s2);
! 128: if (errno == EILSEQ) {
! 129: errno = 0;
! 130: ret = wcscmp(s1, s2);
! 131: if (errno != 0) {
! 132: size_t i;
! 133: for (i = 0; ; ++i) {
! 134: wchar_t c1 = s1[i];
! 135: wchar_t c2 = s2[i];
! 136: if (c1 == L'\0')
! 137: return (c2 == L'\0') ? 0 : -1;
! 138: if (c2 == L'\0')
! 139: return 1;
! 140: if (c1 == c2)
! 141: continue;
! 142: return (int)c1 - (int)c2;
! 143: }
! 144: }
! 145: }
! 146: return ret;
! 147: }
! 148:
! 149: /* counterparts of wcs functions */
! 150:
! 151: void
! 152: bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
! 153: {
! 154:
! 155: if (sort_mb_cur_max == 1)
! 156: fprintf(f, "%s%s%s", prefix, bws->data.cstr, suffix);
! 157: else
! 158: fprintf(f, "%s%S%s", prefix, bws->data.wstr, suffix);
! 159: }
! 160:
! 161: const void *
! 162: bwsrawdata(const struct bwstring *bws)
! 163: {
! 164:
! 165: return &(bws->data);
! 166: }
! 167:
! 168: size_t
! 169: bwsrawlen(const struct bwstring *bws)
! 170: {
! 171:
! 172: return (sort_mb_cur_max == 1) ? bws->len : SIZEOF_WCHAR_STRING(bws->len);
! 173: }
! 174:
! 175: size_t
! 176: bws_memsize(const struct bwstring *bws)
! 177: {
! 178:
! 179: return (sort_mb_cur_max == 1) ? (bws->len + 2 + sizeof(struct bwstring)) :
! 180: (SIZEOF_WCHAR_STRING(bws->len + 1) + sizeof(struct bwstring));
! 181: }
! 182:
! 183: void
! 184: bws_setlen(struct bwstring *bws, size_t newlen)
! 185: {
! 186:
! 187: if (bws && newlen != bws->len && newlen <= bws->len) {
! 188: bws->len = newlen;
! 189: if (sort_mb_cur_max == 1)
! 190: bws->data.cstr[newlen] = '\0';
! 191: else
! 192: bws->data.wstr[newlen] = L'\0';
! 193: }
! 194: }
! 195:
! 196: /*
! 197: * Allocate a new binary string of specified size
! 198: */
! 199: struct bwstring *
! 200: bwsalloc(size_t sz)
! 201: {
! 202: struct bwstring *ret;
! 203:
! 204: if (sort_mb_cur_max == 1) {
! 205: ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
! 206: ret->data.cstr[sz] = '\0';
! 207: } else {
! 208: ret = sort_malloc(sizeof(struct bwstring) +
! 209: SIZEOF_WCHAR_STRING(sz + 1));
! 210: ret->data.wstr[sz] = L'\0';
! 211: }
! 212: ret->len = sz;
! 213:
! 214: return ret;
! 215: }
! 216:
! 217: /*
! 218: * Create a copy of binary string.
! 219: * New string size equals the length of the old string.
! 220: */
! 221: struct bwstring *
! 222: bwsdup(const struct bwstring *s)
! 223: {
! 224: struct bwstring *ret;
! 225:
! 226: if (s == NULL)
! 227: return NULL;
! 228:
! 229: ret = bwsalloc(s->len);
! 230:
! 231: if (sort_mb_cur_max == 1)
! 232: memcpy(ret->data.cstr, s->data.cstr, s->len);
! 233: else
! 234: memcpy(ret->data.wstr, s->data.wstr,
! 235: SIZEOF_WCHAR_STRING(s->len));
! 236:
! 237: return ret;
! 238: }
! 239:
! 240: /*
! 241: * Create a new binary string from a raw binary buffer.
! 242: */
! 243: struct bwstring *
! 244: bwssbdup(const wchar_t *str, size_t len)
! 245: {
! 246:
! 247: if (str == NULL)
! 248: return (len == 0) ? bwsalloc(0) : NULL;
! 249: else {
! 250: struct bwstring *ret;
! 251: size_t i;
! 252:
! 253: ret = bwsalloc(len);
! 254:
! 255: if (sort_mb_cur_max == 1)
! 256: for (i = 0; i < len; ++i)
! 257: ret->data.cstr[i] = (unsigned char) str[i];
! 258: else
! 259: memcpy(ret->data.wstr, str, SIZEOF_WCHAR_STRING(len));
! 260:
! 261: return ret;
! 262: }
! 263: }
! 264:
! 265: /*
! 266: * Create a new binary string from a raw binary buffer.
! 267: */
! 268: struct bwstring *
! 269: bwscsbdup(const unsigned char *str, size_t len)
! 270: {
! 271: struct bwstring *ret;
! 272:
! 273: ret = bwsalloc(len);
! 274:
! 275: if (str) {
! 276: if (sort_mb_cur_max == 1)
! 277: memcpy(ret->data.cstr, str, len);
! 278: else {
! 279: mbstate_t mbs;
! 280: const char *s;
! 281: size_t charlen, chars, cptr;
! 282:
! 283: chars = 0;
! 284: cptr = 0;
! 285: s = (const char *) str;
! 286:
! 287: memset(&mbs, 0, sizeof(mbs));
! 288:
! 289: while (cptr < len) {
! 290: size_t n = sort_mb_cur_max;
! 291:
! 292: if (n > len - cptr)
! 293: n = len - cptr;
! 294: charlen = mbrlen(s + cptr, n, &mbs);
! 295: switch (charlen) {
! 296: case 0:
! 297: /* FALLTHROUGH */
! 298: case (size_t) -1:
! 299: /* FALLTHROUGH */
! 300: case (size_t) -2:
! 301: ret->data.wstr[chars++] =
! 302: (unsigned char) s[cptr];
! 303: ++cptr;
! 304: break;
! 305: default:
! 306: n = mbrtowc(ret->data.wstr + (chars++),
! 307: s + cptr, charlen, &mbs);
! 308: if ((n == (size_t)-1) || (n == (size_t)-2))
! 309: /* NOTREACHED */
! 310: err(2, "mbrtowc error");
! 311: cptr += charlen;
! 312: };
! 313: }
! 314:
! 315: ret->len = chars;
! 316: ret->data.wstr[ret->len] = L'\0';
! 317: }
! 318: }
! 319: return ret;
! 320: }
! 321:
! 322: /*
! 323: * De-allocate object memory
! 324: */
! 325: void
! 326: bwsfree(struct bwstring *s)
! 327: {
! 328:
! 329: sort_free(s);
! 330: }
! 331:
! 332: /*
! 333: * Copy content of src binary string to dst.
! 334: * If the capacity of the dst string is not sufficient,
! 335: * then the data is truncated.
! 336: */
! 337: size_t
! 338: bwscpy(struct bwstring *dst, const struct bwstring *src)
! 339: {
! 340: size_t nums = src->len;
! 341:
! 342: if (nums > dst->len)
! 343: nums = dst->len;
! 344: dst->len = nums;
! 345:
! 346: if (sort_mb_cur_max == 1) {
! 347: memcpy(dst->data.cstr, src->data.cstr, nums);
! 348: dst->data.cstr[dst->len] = '\0';
! 349: } else {
! 350: memcpy(dst->data.wstr, src->data.wstr,
! 351: SIZEOF_WCHAR_STRING(nums + 1));
! 352: dst->data.wstr[dst->len] = L'\0';
! 353: }
! 354:
! 355: return nums;
! 356: }
! 357:
! 358: /*
! 359: * Copy content of src binary string to dst,
! 360: * with specified number of symbols to be copied.
! 361: * If the capacity of the dst string is not sufficient,
! 362: * then the data is truncated.
! 363: */
! 364: struct bwstring *
! 365: bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
! 366: {
! 367: size_t nums = src->len;
! 368:
! 369: if (nums > dst->len)
! 370: nums = dst->len;
! 371: if (nums > size)
! 372: nums = size;
! 373: dst->len = nums;
! 374:
! 375: if (sort_mb_cur_max == 1) {
! 376: memcpy(dst->data.cstr, src->data.cstr, nums);
! 377: dst->data.cstr[dst->len] = '\0';
! 378: } else {
! 379: memcpy(dst->data.wstr, src->data.wstr,
! 380: SIZEOF_WCHAR_STRING(nums + 1));
! 381: dst->data.wstr[dst->len] = L'\0';
! 382: }
! 383:
! 384: return dst;
! 385: }
! 386:
! 387: /*
! 388: * Copy content of src binary string to dst,
! 389: * with specified number of symbols to be copied.
! 390: * An offset value can be specified, from the start of src string.
! 391: * If the capacity of the dst string is not sufficient,
! 392: * then the data is truncated.
! 393: */
! 394: struct bwstring *
! 395: bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
! 396: size_t size)
! 397: {
! 398:
! 399: if (offset >= src->len) {
! 400: dst->data.wstr[0] = 0;
! 401: dst->len = 0;
! 402: } else {
! 403: size_t nums = src->len - offset;
! 404:
! 405: if (nums > dst->len)
! 406: nums = dst->len;
! 407: if (nums > size)
! 408: nums = size;
! 409: dst->len = nums;
! 410: if (sort_mb_cur_max == 1) {
! 411: memcpy(dst->data.cstr, src->data.cstr + offset,
! 412: (nums));
! 413: dst->data.cstr[dst->len] = '\0';
! 414: } else {
! 415: memcpy(dst->data.wstr, src->data.wstr + offset,
! 416: SIZEOF_WCHAR_STRING(nums));
! 417: dst->data.wstr[dst->len] = L'\0';
! 418: }
! 419: }
! 420: return dst;
! 421: }
! 422:
! 423: /*
! 424: * Write binary string to the file.
! 425: * The output is ended either with '\n' (nl == true)
! 426: * or '\0' (nl == false).
! 427: */
! 428: size_t
! 429: bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
! 430: {
! 431:
! 432: if (sort_mb_cur_max == 1) {
! 433: size_t len = bws->len;
! 434:
! 435: if (!zero_ended) {
! 436: bws->data.cstr[len] = '\n';
! 437:
! 438: if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
! 439: err(2, NULL);
! 440:
! 441: bws->data.cstr[len] = '\0';
! 442: } else if (fwrite(bws->data.cstr, len + 1, 1, f) < 1)
! 443: err(2, NULL);
! 444:
! 445: return len + 1;
! 446:
! 447: } else {
! 448: wchar_t eols;
! 449: size_t printed = 0;
! 450:
! 451: eols = zero_ended ? btowc('\0') : btowc('\n');
! 452:
! 453: while (printed < BWSLEN(bws)) {
! 454: const wchar_t *s = bws->data.wstr + printed;
! 455:
! 456: if (*s == L'\0') {
! 457: int nums;
! 458:
! 459: nums = fwprintf(f, L"%lc", *s);
! 460:
! 461: if (nums != 1)
! 462: err(2, NULL);
! 463: ++printed;
! 464: } else {
! 465: int nums;
! 466:
! 467: nums = fwprintf(f, L"%ls", s);
! 468:
! 469: if (nums < 1)
! 470: err(2, NULL);
! 471: printed += nums;
! 472: }
! 473: }
! 474: fwprintf(f, L"%lc", eols);
! 475: return printed + 1;
! 476: }
! 477: }
! 478:
! 479: /*
! 480: * Allocate and read a binary string from file.
! 481: * The strings are nl-ended or zero-ended, depending on the sort setting.
! 482: */
! 483: struct bwstring *
! 484: bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
! 485: {
! 486: wint_t eols;
! 487:
! 488: eols = zero_ended ? btowc('\0') : btowc('\n');
! 489:
! 490: if (!zero_ended && (sort_mb_cur_max > 1)) {
! 491: wchar_t *ret;
! 492:
! 493: ret = fgetwln(f, len);
! 494:
! 495: if (ret == NULL) {
! 496: if (!feof(f))
! 497: err(2, NULL);
! 498: return NULL;
! 499: }
! 500: if (*len > 0) {
! 501: if (ret[*len - 1] == (wchar_t)eols)
! 502: --(*len);
! 503: }
! 504: return bwssbdup(ret, *len);
! 505:
! 506: } else if (!zero_ended && (sort_mb_cur_max == 1)) {
! 507: char *ret;
! 508:
! 509: ret = fgetln(f, len);
! 510:
! 511: if (ret == NULL) {
! 512: if (!feof(f))
! 513: err(2, NULL);
! 514: return NULL;
! 515: }
! 516: if (*len > 0) {
! 517: if (ret[*len - 1] == '\n')
! 518: --(*len);
! 519: }
! 520: return bwscsbdup((unsigned char *)ret, *len);
! 521:
! 522: } else {
! 523: *len = 0;
! 524:
! 525: if (feof(f))
! 526: return NULL;
! 527:
! 528: if (2 >= rb->fgetwln_z_buffer_size) {
! 529: rb->fgetwln_z_buffer_size += 256;
! 530: rb->fgetwln_z_buffer =
! 531: sort_reallocarray(rb->fgetwln_z_buffer,
! 532: rb->fgetwln_z_buffer_size, sizeof(wchar_t));
! 533: }
! 534: rb->fgetwln_z_buffer[*len] = 0;
! 535:
! 536: if (sort_mb_cur_max == 1)
! 537: while (!feof(f)) {
! 538: int c;
! 539:
! 540: c = fgetc(f);
! 541:
! 542: if (c == EOF) {
! 543: if (*len == 0)
! 544: return NULL;
! 545: goto line_read_done;
! 546: }
! 547: if (c == eols)
! 548: goto line_read_done;
! 549:
! 550: if (*len + 1 >= rb->fgetwln_z_buffer_size) {
! 551: rb->fgetwln_z_buffer_size += 256;
! 552: rb->fgetwln_z_buffer =
! 553: sort_reallocarray(rb->fgetwln_z_buffer,
! 554: rb->fgetwln_z_buffer_size, sizeof(wchar_t));
! 555: }
! 556:
! 557: rb->fgetwln_z_buffer[*len] = c;
! 558: rb->fgetwln_z_buffer[++(*len)] = 0;
! 559: }
! 560: else
! 561: while (!feof(f)) {
! 562: wint_t c = 0;
! 563:
! 564: c = fgetwc(f);
! 565:
! 566: if (c == WEOF) {
! 567: if (*len == 0)
! 568: return NULL;
! 569: goto line_read_done;
! 570: }
! 571: if (c == eols)
! 572: goto line_read_done;
! 573:
! 574: if (*len + 1 >= rb->fgetwln_z_buffer_size) {
! 575: rb->fgetwln_z_buffer_size += 256;
! 576: rb->fgetwln_z_buffer =
! 577: sort_reallocarray(rb->fgetwln_z_buffer,
! 578: rb->fgetwln_z_buffer_size, sizeof(wchar_t));
! 579: }
! 580:
! 581: rb->fgetwln_z_buffer[*len] = c;
! 582: rb->fgetwln_z_buffer[++(*len)] = 0;
! 583: }
! 584:
! 585: line_read_done:
! 586: /* we do not count the last 0 */
! 587: return bwssbdup(rb->fgetwln_z_buffer, *len);
! 588: }
! 589: }
! 590:
! 591: int
! 592: bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
! 593: size_t offset, size_t len)
! 594: {
! 595: size_t cmp_len, len1, len2;
! 596: int res = 0;
! 597:
! 598: len1 = bws1->len;
! 599: len2 = bws2->len;
! 600:
! 601: if (len1 <= offset) {
! 602: return (len2 <= offset) ? 0 : -1;
! 603: } else {
! 604: if (len2 <= offset)
! 605: return 1;
! 606: else {
! 607: len1 -= offset;
! 608: len2 -= offset;
! 609:
! 610: cmp_len = len1;
! 611:
! 612: if (len2 < cmp_len)
! 613: cmp_len = len2;
! 614:
! 615: if (len < cmp_len)
! 616: cmp_len = len;
! 617:
! 618: if (sort_mb_cur_max == 1) {
! 619: const unsigned char *s1, *s2;
! 620:
! 621: s1 = bws1->data.cstr + offset;
! 622: s2 = bws2->data.cstr + offset;
! 623:
! 624: res = memcmp(s1, s2, cmp_len);
! 625:
! 626: } else {
! 627: const wchar_t *s1, *s2;
! 628:
! 629: s1 = bws1->data.wstr + offset;
! 630: s2 = bws2->data.wstr + offset;
! 631:
! 632: res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
! 633: }
! 634: }
! 635: }
! 636:
! 637: if (res == 0) {
! 638: if (len1 < cmp_len && len1 < len2)
! 639: res = -1;
! 640: else if (len2 < cmp_len && len2 < len1)
! 641: res = +1;
! 642: }
! 643:
! 644: return res;
! 645: }
! 646:
! 647: int
! 648: bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
! 649: {
! 650: size_t len1, len2, cmp_len;
! 651: int res;
! 652:
! 653: len1 = bws1->len;
! 654: len2 = bws2->len;
! 655:
! 656: len1 -= offset;
! 657: len2 -= offset;
! 658:
! 659: cmp_len = len1;
! 660:
! 661: if (len2 < cmp_len)
! 662: cmp_len = len2;
! 663:
! 664: res = bwsncmp(bws1, bws2, offset, cmp_len);
! 665:
! 666: if (res == 0) {
! 667: if (len1 < len2)
! 668: res = -1;
! 669: else if (len2 < len1)
! 670: res = +1;
! 671: }
! 672:
! 673: return res;
! 674: }
! 675:
! 676: int
! 677: bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
! 678: {
! 679: wchar_t c1, c2;
! 680: size_t i = 0;
! 681:
! 682: for (i = 0; i < len; ++i) {
! 683: c1 = bws_get_iter_value(iter1);
! 684: c2 = bws_get_iter_value(iter2);
! 685: if (c1 != c2)
! 686: return c1 - c2;
! 687: iter1 = bws_iterator_inc(iter1, 1);
! 688: iter2 = bws_iterator_inc(iter2, 1);
! 689: }
! 690:
! 691: return 0;
! 692: }
! 693:
! 694: int
! 695: bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
! 696: {
! 697: size_t len1, len2;
! 698:
! 699: len1 = bws1->len;
! 700: len2 = bws2->len;
! 701:
! 702: if (len1 <= offset)
! 703: return (len2 <= offset) ? 0 : -1;
! 704: else {
! 705: if (len2 <= offset)
! 706: return 1;
! 707: else {
! 708: len1 -= offset;
! 709: len2 -= offset;
! 710:
! 711: if (sort_mb_cur_max == 1) {
! 712: const unsigned char *s1, *s2;
! 713:
! 714: s1 = bws1->data.cstr + offset;
! 715: s2 = bws2->data.cstr + offset;
! 716:
! 717: if (byte_sort) {
! 718: int res = 0;
! 719:
! 720: if (len1 > len2) {
! 721: res = memcmp(s1, s2, len2);
! 722: if (!res)
! 723: res = +1;
! 724: } else if (len1 < len2) {
! 725: res = memcmp(s1, s2, len1);
! 726: if (!res)
! 727: res = -1;
! 728: } else
! 729: res = memcmp(s1, s2, len1);
! 730:
! 731: return res;
! 732:
! 733: } else {
! 734: int res = 0;
! 735: size_t i, maxlen;
! 736:
! 737: i = 0;
! 738: maxlen = len1;
! 739:
! 740: if (maxlen > len2)
! 741: maxlen = len2;
! 742:
! 743: while (i < maxlen) {
! 744: /* goto next non-zero part: */
! 745: while ((i < maxlen) &&
! 746: !s1[i] && !s2[i])
! 747: ++i;
! 748:
! 749: if (i >= maxlen)
! 750: break;
! 751:
! 752: if (s1[i] == 0) {
! 753: if (s2[i] == 0)
! 754: /* NOTREACHED */
! 755: err(2, "bwscoll error 01");
! 756: else
! 757: return -1;
! 758: } else if (s2[i] == 0)
! 759: return 1;
! 760:
! 761: res = strcoll((const char *)(s1 + i), (const char *)(s2 + i));
! 762: if (res)
! 763: return res;
! 764:
! 765: while ((i < maxlen) &&
! 766: s1[i] && s2[i])
! 767: ++i;
! 768:
! 769: if (i >= maxlen)
! 770: break;
! 771:
! 772: if (s1[i] == 0) {
! 773: if (s2[i] == 0) {
! 774: ++i;
! 775: continue;
! 776: } else
! 777: return -1;
! 778: } else if (s2[i] == 0)
! 779: return 1;
! 780: else
! 781: /* NOTREACHED */
! 782: err(2, "bwscoll error 02");
! 783: }
! 784:
! 785: if (len1 < len2)
! 786: return -1;
! 787: else if (len1 > len2)
! 788: return 1;
! 789:
! 790: return 0;
! 791: }
! 792: } else {
! 793: const wchar_t *s1, *s2;
! 794: size_t i, maxlen;
! 795: int res = 0;
! 796:
! 797: s1 = bws1->data.wstr + offset;
! 798: s2 = bws2->data.wstr + offset;
! 799:
! 800: i = 0;
! 801: maxlen = len1;
! 802:
! 803: if (maxlen > len2)
! 804: maxlen = len2;
! 805:
! 806: while (i < maxlen) {
! 807:
! 808: /* goto next non-zero part: */
! 809: while ((i < maxlen) &&
! 810: !s1[i] && !s2[i])
! 811: ++i;
! 812:
! 813: if (i >= maxlen)
! 814: break;
! 815:
! 816: if (s1[i] == 0) {
! 817: if (s2[i] == 0)
! 818: /* NOTREACHED */
! 819: err(2, "bwscoll error 1");
! 820: else
! 821: return -1;
! 822: } else if (s2[i] == 0)
! 823: return 1;
! 824:
! 825: res = wide_str_coll(s1 + i, s2 + i);
! 826: if (res)
! 827: return res;
! 828:
! 829: while ((i < maxlen) && s1[i] && s2[i])
! 830: ++i;
! 831:
! 832: if (i >= maxlen)
! 833: break;
! 834:
! 835: if (s1[i] == 0) {
! 836: if (s2[i] == 0) {
! 837: ++i;
! 838: continue;
! 839: } else
! 840: return -1;
! 841: } else if (s2[i] == 0)
! 842: return 1;
! 843: else
! 844: /* NOTREACHED */
! 845: err(2, "bwscoll error 2");
! 846: }
! 847:
! 848: if (len1 == len2)
! 849: return 0;
! 850: return len1 < len2 ? -1 : 1;
! 851: }
! 852: }
! 853: }
! 854: }
! 855:
! 856: /*
! 857: * Correction of the system API
! 858: */
! 859: double
! 860: bwstod(struct bwstring *s0, bool *empty)
! 861: {
! 862: double ret = 0;
! 863:
! 864: if (sort_mb_cur_max == 1) {
! 865: char *ep, *end, *s;
! 866:
! 867: s = (char *)s0->data.cstr;
! 868: end = s + s0->len;
! 869: ep = NULL;
! 870:
! 871: while (isblank((unsigned char)*s) && s < end)
! 872: ++s;
! 873:
! 874: if (!isprint((unsigned char)*s)) {
! 875: *empty = true;
! 876: return 0;
! 877: }
! 878:
! 879: ret = strtod(s, &ep);
! 880: if (ep == s) {
! 881: *empty = true;
! 882: return 0;
! 883: }
! 884: } else {
! 885: wchar_t *end, *ep, *s;
! 886:
! 887: s = s0->data.wstr;
! 888: end = s + s0->len;
! 889: ep = NULL;
! 890:
! 891: while (iswblank(*s) && s < end)
! 892: ++s;
! 893:
! 894: if (!iswprint(*s)) {
! 895: *empty = true;
! 896: return 0;
! 897: }
! 898:
! 899: ret = wcstod(s, &ep);
! 900: if (ep == s) {
! 901: *empty = true;
! 902: return 0;
! 903: }
! 904: }
! 905:
! 906: *empty = false;
! 907: return ret;
! 908: }
! 909:
! 910: /*
! 911: * A helper function for monthcoll. If a line matches
! 912: * a month name, it returns (number of the month - 1),
! 913: * while if there is no match, it just return -1.
! 914: */
! 915:
! 916: int
! 917: bws_month_score(const struct bwstring *s0)
! 918: {
! 919:
! 920: if (sort_mb_cur_max == 1) {
! 921: const char *end, *s;
! 922: int i;
! 923:
! 924: s = (char *)s0->data.cstr;
! 925: end = s + s0->len;
! 926:
! 927: while (isblank((unsigned char)*s) && s < end)
! 928: ++s;
! 929:
! 930: for (i = 11; i >= 0; --i) {
! 931: if (cmonths[i] &&
! 932: (s == strstr(s, cmonths[i])))
! 933: return i;
! 934: }
! 935: } else {
! 936: const wchar_t *end, *s;
! 937: int i;
! 938:
! 939: s = s0->data.wstr;
! 940: end = s + s0->len;
! 941:
! 942: while (iswblank(*s) && s < end)
! 943: ++s;
! 944:
! 945: for (i = 11; i >= 0; --i) {
! 946: if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
! 947: return i;
! 948: }
! 949: }
! 950:
! 951: return -1;
! 952: }
! 953:
! 954: /*
! 955: * Rips out leading blanks (-b).
! 956: */
! 957: struct bwstring *
! 958: ignore_leading_blanks(struct bwstring *str)
! 959: {
! 960:
! 961: if (sort_mb_cur_max == 1) {
! 962: unsigned char *dst, *end, *src;
! 963:
! 964: src = str->data.cstr;
! 965: dst = src;
! 966: end = src + str->len;
! 967:
! 968: while (src < end && isblank(*src))
! 969: ++src;
! 970:
! 971: if (src != dst) {
! 972: size_t newlen;
! 973:
! 974: newlen = BWSLEN(str) - (src - dst);
! 975:
! 976: while (src < end) {
! 977: *dst = *src;
! 978: ++dst;
! 979: ++src;
! 980: }
! 981: bws_setlen(str, newlen);
! 982: }
! 983: } else {
! 984: wchar_t *dst, *end, *src;
! 985:
! 986: src = str->data.wstr;
! 987: dst = src;
! 988: end = src + str->len;
! 989:
! 990: while (src < end && iswblank(*src))
! 991: ++src;
! 992:
! 993: if (src != dst) {
! 994:
! 995: size_t newlen = BWSLEN(str) - (src - dst);
! 996:
! 997: while (src < end) {
! 998: *dst = *src;
! 999: ++dst;
! 1000: ++src;
! 1001: }
! 1002: bws_setlen(str, newlen);
! 1003:
! 1004: }
! 1005: }
! 1006: return str;
! 1007: }
! 1008:
! 1009: /*
! 1010: * Rips out nonprinting characters (-i).
! 1011: */
! 1012: struct bwstring *
! 1013: ignore_nonprinting(struct bwstring *str)
! 1014: {
! 1015: size_t newlen = str->len;
! 1016:
! 1017: if (sort_mb_cur_max == 1) {
! 1018: unsigned char *dst, *end, *src;
! 1019: unsigned char c;
! 1020:
! 1021: src = str->data.cstr;
! 1022: dst = src;
! 1023: end = src + str->len;
! 1024:
! 1025: while (src < end) {
! 1026: c = *src;
! 1027: if (isprint(c)) {
! 1028: *dst = c;
! 1029: ++dst;
! 1030: ++src;
! 1031: } else {
! 1032: ++src;
! 1033: --newlen;
! 1034: }
! 1035: }
! 1036: } else {
! 1037: wchar_t *dst, *end, *src;
! 1038: wchar_t c;
! 1039:
! 1040: src = str->data.wstr;
! 1041: dst = src;
! 1042: end = src + str->len;
! 1043:
! 1044: while (src < end) {
! 1045: c = *src;
! 1046: if (iswprint(c)) {
! 1047: *dst = c;
! 1048: ++dst;
! 1049: ++src;
! 1050: } else {
! 1051: ++src;
! 1052: --newlen;
! 1053: }
! 1054: }
! 1055: }
! 1056: bws_setlen(str, newlen);
! 1057:
! 1058: return str;
! 1059: }
! 1060:
! 1061: /*
! 1062: * Rips out any characters that are not alphanumeric characters
! 1063: * nor blanks (-d).
! 1064: */
! 1065: struct bwstring *
! 1066: dictionary_order(struct bwstring *str)
! 1067: {
! 1068: size_t newlen = str->len;
! 1069:
! 1070: if (sort_mb_cur_max == 1) {
! 1071: unsigned char *dst, *end, *src;
! 1072: unsigned char c;
! 1073:
! 1074: src = str->data.cstr;
! 1075: dst = src;
! 1076: end = src + str->len;
! 1077:
! 1078: while (src < end) {
! 1079: c = *src;
! 1080: if (isalnum(c) || isblank(c)) {
! 1081: *dst = c;
! 1082: ++dst;
! 1083: ++src;
! 1084: } else {
! 1085: ++src;
! 1086: --newlen;
! 1087: }
! 1088: }
! 1089: } else {
! 1090: wchar_t *dst, *end, *src;
! 1091: wchar_t c;
! 1092:
! 1093: src = str->data.wstr;
! 1094: dst = src;
! 1095: end = src + str->len;
! 1096:
! 1097: while (src < end) {
! 1098: c = *src;
! 1099: if (iswalnum(c) || iswblank(c)) {
! 1100: *dst = c;
! 1101: ++dst;
! 1102: ++src;
! 1103: } else {
! 1104: ++src;
! 1105: --newlen;
! 1106: }
! 1107: }
! 1108: }
! 1109: bws_setlen(str, newlen);
! 1110:
! 1111: return str;
! 1112: }
! 1113:
! 1114: /*
! 1115: * Converts string to lower case(-f).
! 1116: */
! 1117: struct bwstring *
! 1118: ignore_case(struct bwstring *str)
! 1119: {
! 1120:
! 1121: if (sort_mb_cur_max == 1) {
! 1122: unsigned char *end, *s;
! 1123:
! 1124: s = str->data.cstr;
! 1125: end = s + str->len;
! 1126:
! 1127: while (s < end) {
! 1128: *s = toupper(*s);
! 1129: ++s;
! 1130: }
! 1131: } else {
! 1132: wchar_t *end, *s;
! 1133:
! 1134: s = str->data.wstr;
! 1135: end = s + str->len;
! 1136:
! 1137: while (s < end) {
! 1138: *s = towupper(*s);
! 1139: ++s;
! 1140: }
! 1141: }
! 1142: return str;
! 1143: }
! 1144:
! 1145: void
! 1146: bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
! 1147: {
! 1148:
! 1149: if (sort_mb_cur_max == 1)
! 1150: warnx("%s:%zu: disorder: %s", fn, pos + 1, s->data.cstr);
! 1151: else
! 1152: warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->data.wstr);
! 1153: }