Annotation of src/usr.bin/window/compress.c, Revision 1.1
1.1 ! deraadt 1: /* $NetBSD: compress.c,v 1.3 1995/09/28 10:34:13 tls Exp $ */
! 2:
! 3: /*
! 4: * Copyright (c) 1989, 1993
! 5: * The Regents of the University of California. All rights reserved.
! 6: *
! 7: * This code is derived from software contributed to Berkeley by
! 8: * Edward Wang at The University of California, Berkeley.
! 9: *
! 10: * Redistribution and use in source and binary forms, with or without
! 11: * modification, are permitted provided that the following conditions
! 12: * are met:
! 13: * 1. Redistributions of source code must retain the above copyright
! 14: * notice, this list of conditions and the following disclaimer.
! 15: * 2. Redistributions in binary form must reproduce the above copyright
! 16: * notice, this list of conditions and the following disclaimer in the
! 17: * documentation and/or other materials provided with the distribution.
! 18: * 3. All advertising materials mentioning features or use of this software
! 19: * must display the following acknowledgement:
! 20: * This product includes software developed by the University of
! 21: * California, Berkeley and its contributors.
! 22: * 4. Neither the name of the University nor the names of its contributors
! 23: * may be used to endorse or promote products derived from this software
! 24: * without specific prior written permission.
! 25: *
! 26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
! 27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
! 30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 36: * SUCH DAMAGE.
! 37: */
! 38:
! 39: #ifndef lint
! 40: #if 0
! 41: static char sccsid[] = "@(#)compress.c 8.1 (Berkeley) 6/6/93";
! 42: #else
! 43: static char rcsid[] = "$NetBSD: compress.c,v 1.3 1995/09/28 10:34:13 tls Exp $";
! 44: #endif
! 45: #endif /* not lint */
! 46:
! 47: #include "ww.h"
! 48: #include "tt.h"
! 49:
! 50: /* special */
! 51: #include <stdio.h>
! 52: #include <fcntl.h>
! 53: int cc_trace = 0;
! 54: FILE *cc_trace_fp;
! 55:
! 56: /* tunable parameters */
! 57:
! 58: int cc_reverse = 1;
! 59: int cc_sort = 0;
! 60: int cc_chop = 0;
! 61:
! 62: int cc_token_max = 8; /* <= TOKEN_MAX */
! 63: int cc_token_min = 2; /* > tt.tt_put_token_cost */
! 64: int cc_npass0 = 1;
! 65: int cc_npass1 = 1;
! 66:
! 67: int cc_bufsize = 1024 * 3; /* XXX, or 80 * 24 * 2 */
! 68:
! 69: int cc_ntoken = 8192;
! 70:
! 71: #define cc_weight XXX
! 72: #ifndef cc_weight
! 73: int cc_weight = 0;
! 74: #endif
! 75:
! 76: #define TOKEN_MAX 16
! 77:
! 78: struct cc {
! 79: char string[TOKEN_MAX];
! 80: char length;
! 81: char flag;
! 82: #ifndef cc_weight
! 83: short weight;
! 84: #endif
! 85: long time; /* time last seen */
! 86: short bcount; /* count in this buffer */
! 87: short ccount; /* count in compression */
! 88: short places; /* places in the buffer */
! 89: short code; /* token code */
! 90: struct cc *qforw, *qback;
! 91: struct cc *hforw, **hback;
! 92: };
! 93:
! 94: short cc_thresholds[TOKEN_MAX + 1];
! 95: #define thresh(length) (cc_thresholds[length])
! 96: #define threshp(code, count, length) \
! 97: ((code) >= 0 || (short) (count) >= cc_thresholds[length])
! 98:
! 99: #ifndef cc_weight
! 100: short cc_wthresholds[TOKEN_MAX + 1];
! 101: #define wthresh(length) (cc_wthresholds[length])
! 102: #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
! 103: #else
! 104: #define wthreshp(weight, length) (0)
! 105: #endif
! 106:
! 107: #ifndef cc_weight
! 108: short cc_wlimits[TOKEN_MAX + 1];
! 109: #define wlimit(length) (cc_wlimits[length])
! 110: #endif
! 111:
! 112: #define put_token_score(length) ((length) - tt.tt_put_token_cost)
! 113:
! 114: int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
! 115: #define score_adjust(score, p) \
! 116: do { \
! 117: int length = (p)->length; \
! 118: int ccount = (p)->ccount; \
! 119: if (threshp((p)->code, ccount, length) || \
! 120: wthreshp((p)->weight, length)) /* XXX */ \
! 121: (score) -= length - tt.tt_put_token_cost; \
! 122: else \
! 123: (score) += cc_score_adjustments[length][ccount]; \
! 124: } while (0)
! 125:
! 126: int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
! 127:
! 128: struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
! 129:
! 130: #define qinsert(p1, p2) \
! 131: do { \
! 132: register struct cc *forw = (p1)->qforw; \
! 133: register struct cc *back = (p1)->qback; \
! 134: back->qforw = forw; \
! 135: forw->qback = back; \
! 136: forw = (p2)->qforw; \
! 137: (p1)->qforw = forw; \
! 138: forw->qback = (p1); \
! 139: (p2)->qforw = (p1); \
! 140: (p1)->qback = (p2); \
! 141: } while (0)
! 142:
! 143: #define qinsertq(q, p) \
! 144: ((q)->qforw == (q) ? 0 : \
! 145: ((q)->qback->qforw = (p)->qforw, \
! 146: (p)->qforw->qback = (q)->qback, \
! 147: (q)->qforw->qback = (p), \
! 148: (p)->qforw = (q)->qforw, \
! 149: (q)->qforw = (q), \
! 150: (q)->qback = (q)))
! 151:
! 152: #define H (14)
! 153: #define HSIZE (1 << H)
! 154: #define hash(h, c) ((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1)
! 155:
! 156: char *cc_buffer;
! 157: struct cc **cc_output; /* the output array */
! 158: short *cc_places[TOKEN_MAX + 1];
! 159: short *cc_hashcodes; /* for computing hashcodes */
! 160: struct cc **cc_htab; /* the hash table */
! 161: struct cc **cc_tokens; /* holds all the active tokens */
! 162: struct cc_undo {
! 163: struct cc **pos;
! 164: struct cc *val;
! 165: } *cc_undo;
! 166:
! 167: long cc_time, cc_time0;
! 168:
! 169: char *cc_tt_ob, *cc_tt_obe;
! 170:
! 171: ccinit()
! 172: {
! 173: register i, j;
! 174: register struct cc *p;
! 175:
! 176: if (tt.tt_token_max > cc_token_max)
! 177: tt.tt_token_max = cc_token_max;
! 178: if (tt.tt_token_min < cc_token_min)
! 179: tt.tt_token_min = cc_token_min;
! 180: if (tt.tt_token_min > tt.tt_token_max) {
! 181: tt.tt_ntoken = 0;
! 182: return 0;
! 183: }
! 184: if (tt.tt_ntoken > cc_ntoken / 2) /* not likely */
! 185: tt.tt_ntoken = cc_ntoken / 2;
! 186: #define C(x) (sizeof (x) / sizeof *(x))
! 187: for (i = 0; i < C(cc_thresholds); i++) {
! 188: int h = i - tt.tt_put_token_cost;
! 189: if (h > 0)
! 190: cc_thresholds[i] =
! 191: (tt.tt_set_token_cost + 1 + h - 1) / h + 1;
! 192: else
! 193: cc_thresholds[i] = 0;
! 194: }
! 195: for (i = 0; i < C(cc_score_adjustments); i++) {
! 196: int t = cc_thresholds[i];
! 197: for (j = 0; j < C(*cc_score_adjustments); j++) {
! 198: if (j >= t)
! 199: cc_score_adjustments[i][j] =
! 200: - (i - tt.tt_put_token_cost);
! 201: else if (j < t - 1)
! 202: cc_score_adjustments[i][j] = 0;
! 203: else
! 204: /*
! 205: * cost now is
! 206: * length * (ccount + 1) a
! 207: * cost before was
! 208: * set-token-cost + length +
! 209: * ccount * put-token-cost b
! 210: * the score adjustment is (b - a)
! 211: */
! 212: cc_score_adjustments[i][j] =
! 213: tt.tt_set_token_cost + i +
! 214: j * tt.tt_put_token_cost -
! 215: i * (j + 1);
! 216: if (j >= t)
! 217: cc_initial_scores[i][j] = 0;
! 218: else
! 219: /*
! 220: * - (set-token-cost +
! 221: * (length - put-token-cost) -
! 222: * (length - put-token-cost) * ccount)
! 223: */
! 224: cc_initial_scores[i][j] =
! 225: - (tt.tt_set_token_cost +
! 226: (i - tt.tt_put_token_cost) -
! 227: (i - tt.tt_put_token_cost) * j);
! 228: }
! 229: }
! 230: #ifndef cc_weight
! 231: for (i = 1; i < C(cc_wthresholds); i++) {
! 232: cc_wthresholds[i] =
! 233: ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
! 234: i / 5 + 1) *
! 235: cc_weight + 1;
! 236: cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
! 237: }
! 238: #endif
! 239: #undef C
! 240: if ((cc_output = (struct cc **)
! 241: malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0)
! 242: goto nomem;
! 243: if ((cc_hashcodes = (short *)
! 244: malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0)
! 245: goto nomem;
! 246: if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
! 247: goto nomem;
! 248: if ((cc_tokens = (struct cc **)
! 249: malloc((unsigned)
! 250: (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
! 251: sizeof *cc_tokens)) == 0)
! 252: goto nomem;
! 253: if ((cc_undo = (struct cc_undo *)
! 254: malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0)
! 255: goto nomem;
! 256: for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
! 257: if ((cc_places[i] = (short *)
! 258: malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0)
! 259: goto nomem;
! 260: cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
! 261: cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
! 262: cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
! 263: cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
! 264: if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0)
! 265: goto nomem;
! 266: for (i = 0; i < tt.tt_ntoken; i++) {
! 267: p->code = i;
! 268: p->time = -1;
! 269: p->qback = cc_q0a.qback;
! 270: p->qforw = &cc_q0a;
! 271: p->qback->qforw = p;
! 272: cc_q0a.qback = p;
! 273: p++;
! 274: }
! 275: for (; i < cc_ntoken; i++) {
! 276: p->code = -1;
! 277: p->time = -1;
! 278: p->qback = cc_q1a.qback;
! 279: p->qforw = &cc_q1a;
! 280: p->qback->qforw = p;
! 281: cc_q1a.qback = p;
! 282: p++;
! 283: }
! 284: cc_tt_ob = tt_ob;
! 285: cc_tt_obe = tt_obe;
! 286: if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0)
! 287: goto nomem;
! 288: return 0;
! 289: nomem:
! 290: wwerrno = WWE_NOMEM;
! 291: return -1;
! 292: }
! 293:
! 294: ccstart()
! 295: {
! 296: int ccflush();
! 297:
! 298: ttflush();
! 299: tt_obp = tt_ob = cc_buffer;
! 300: tt_obe = tt_ob + cc_bufsize;
! 301: tt.tt_flush = ccflush;
! 302: if (cc_trace) {
! 303: cc_trace_fp = fopen("window-trace", "a");
! 304: (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
! 305: }
! 306: ccreset();
! 307: }
! 308:
! 309: ccreset()
! 310: {
! 311: register struct cc *p;
! 312:
! 313: bzero((char *) cc_htab, HSIZE * sizeof *cc_htab);
! 314: for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
! 315: p->hback = 0;
! 316: for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
! 317: p->hback = 0;
! 318: }
! 319:
! 320: ccend()
! 321: {
! 322:
! 323: ttflush();
! 324: tt_obp = tt_ob = cc_tt_ob;
! 325: tt_obe = cc_tt_obe;
! 326: tt.tt_flush = 0;
! 327: if (cc_trace_fp != NULL) {
! 328: (void) fclose(cc_trace_fp);
! 329: cc_trace_fp = NULL;
! 330: }
! 331: }
! 332:
! 333: ccflush()
! 334: {
! 335: int bufsize = tt_obp - tt_ob;
! 336: int n;
! 337:
! 338: if (tt_ob != cc_buffer)
! 339: abort();
! 340: if (cc_trace_fp != NULL) {
! 341: (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
! 342: (void) putc(-1, cc_trace_fp);
! 343: }
! 344: tt.tt_flush = 0;
! 345: (*tt.tt_compress)(1);
! 346: if (bufsize < tt.tt_token_min) {
! 347: ttflush();
! 348: goto out;
! 349: }
! 350: tt_obp = tt_ob = cc_tt_ob;
! 351: tt_obe = cc_tt_obe;
! 352: cc_time0 = cc_time;
! 353: cc_time += bufsize;
! 354: n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
! 355: cc_compress_phase(cc_output, bufsize, cc_tokens, n);
! 356: cc_output_phase(cc_buffer, cc_output, bufsize);
! 357: ttflush();
! 358: tt_obp = tt_ob = cc_buffer;
! 359: tt_obe = cc_buffer + cc_bufsize;
! 360: out:
! 361: (*tt.tt_compress)(0);
! 362: tt.tt_flush = ccflush;
! 363: }
! 364:
! 365: cc_sweep_phase(buffer, bufsize, tokens)
! 366: char *buffer;
! 367: struct cc **tokens;
! 368: {
! 369: register struct cc **pp = tokens;
! 370: register i, n;
! 371: #ifdef STATS
! 372: int nn, ii;
! 373: #endif
! 374:
! 375: #ifdef STATS
! 376: if (verbose >= 0)
! 377: time_begin();
! 378: if (verbose > 0)
! 379: printf("Sweep:");
! 380: #endif
! 381: cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
! 382: #ifdef STATS
! 383: ntoken_stat = 0;
! 384: nn = 0;
! 385: ii = 0;
! 386: #endif
! 387: for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
! 388: #ifdef STATS
! 389: if (verbose > 0) {
! 390: if (ii > 7) {
! 391: printf("\n ");
! 392: ii = 0;
! 393: }
! 394: ii++;
! 395: printf(" (%d", i);
! 396: (void) fflush(stdout);
! 397: }
! 398: #endif
! 399: n = cc_sweep(buffer, bufsize, pp, i);
! 400: pp += n;
! 401: #ifdef STATS
! 402: if (verbose > 0) {
! 403: if (--n > 0) {
! 404: printf(" %d", n);
! 405: nn += n;
! 406: }
! 407: putchar(')');
! 408: }
! 409: #endif
! 410: }
! 411: qinsertq(&cc_q1b, &cc_q1a);
! 412: #ifdef STATS
! 413: if (verbose > 0)
! 414: printf("\n %d tokens, %d candidates\n",
! 415: ntoken_stat, nn);
! 416: if (verbose >= 0)
! 417: time_end();
! 418: #endif
! 419: return pp - tokens;
! 420: }
! 421:
! 422: cc_sweep0(buffer, n, length)
! 423: char *buffer;
! 424: {
! 425: register char *p;
! 426: register short *hc;
! 427: register i;
! 428: register short c;
! 429: register short pc = tt.tt_padc;
! 430:
! 431: /* n and length are at least 1 */
! 432: p = buffer++;
! 433: hc = cc_hashcodes;
! 434: i = n;
! 435: do {
! 436: if ((*hc++ = *p++) == pc)
! 437: hc[-1] = -1;
! 438: } while (--i);
! 439: while (--length) {
! 440: p = buffer++;
! 441: hc = cc_hashcodes;
! 442: for (i = n--; --i;) {
! 443: if ((c = *p++) == pc || *hc < 0)
! 444: c = -1;
! 445: else
! 446: c = hash(*hc, c);
! 447: *hc++ = c;
! 448: }
! 449: }
! 450: }
! 451:
! 452: cc_sweep(buffer, bufsize, tokens, length)
! 453: char *buffer;
! 454: struct cc **tokens;
! 455: register length;
! 456: {
! 457: register struct cc *p;
! 458: register char *cp;
! 459: register i;
! 460: short *hc;
! 461: short *places = cc_places[length];
! 462: struct cc **pp = tokens;
! 463: short threshold = thresh(length);
! 464: #ifndef cc_weight
! 465: short wthreshold = wthresh(length);
! 466: short limit = wlimit(length);
! 467: #endif
! 468: int time;
! 469: short pc = tt.tt_padc;
! 470:
! 471: i = length - 1;
! 472: bufsize -= i;
! 473: cp = buffer + i;
! 474: hc = cc_hashcodes;
! 475: time = cc_time0;
! 476: for (i = 0; i < bufsize; i++, time++) {
! 477: struct cc **h;
! 478:
! 479: {
! 480: register short *hc1 = hc;
! 481: register short c = *cp++;
! 482: register short hh;
! 483: if ((hh = *hc1) < 0 || c == pc) {
! 484: *hc1++ = -1;
! 485: hc = hc1;
! 486: continue;
! 487: }
! 488: h = cc_htab + (*hc1++ = hash(hh, c));
! 489: hc = hc1;
! 490: }
! 491: for (p = *h; p != 0; p = p->hforw)
! 492: if (p->length == (char) length) {
! 493: register char *p1 = p->string;
! 494: register char *p2 = cp - length;
! 495: register n = length;
! 496: do
! 497: if (*p1++ != *p2++)
! 498: goto fail;
! 499: while (--n);
! 500: break;
! 501: fail:;
! 502: }
! 503: if (p == 0) {
! 504: p = cc_q1a.qback;
! 505: if (p == &cc_q1a ||
! 506: p->time >= cc_time0 && p->length == (char) length)
! 507: continue;
! 508: if (p->hback != 0)
! 509: if ((*p->hback = p->hforw) != 0)
! 510: p->hforw->hback = p->hback;
! 511: {
! 512: register char *p1 = p->string;
! 513: register char *p2 = cp - length;
! 514: register n = length;
! 515: do
! 516: *p1++ = *p2++;
! 517: while (--n);
! 518: }
! 519: p->length = length;
! 520: #ifndef cc_weight
! 521: p->weight = cc_weight;
! 522: #endif
! 523: p->time = time;
! 524: p->bcount = 1;
! 525: p->ccount = 0;
! 526: p->flag = 0;
! 527: if ((p->hforw = *h) != 0)
! 528: p->hforw->hback = &p->hforw;
! 529: *h = p;
! 530: p->hback = h;
! 531: qinsert(p, &cc_q1a);
! 532: places[i] = -1;
! 533: p->places = i;
! 534: #ifdef STATS
! 535: ntoken_stat++;
! 536: #endif
! 537: } else if (p->time < cc_time0) {
! 538: #ifndef cc_weight
! 539: if ((p->weight += p->time - time) < 0)
! 540: p->weight = cc_weight;
! 541: else if ((p->weight += cc_weight) > limit)
! 542: p->weight = limit;
! 543: #endif
! 544: p->time = time;
! 545: p->bcount = 1;
! 546: p->ccount = 0;
! 547: if (p->code >= 0) {
! 548: p->flag = 1;
! 549: *pp++ = p;
! 550: } else
! 551: #ifndef cc_weight
! 552: if (p->weight >= wthreshold) {
! 553: p->flag = 1;
! 554: *pp++ = p;
! 555: qinsert(p, &cc_q1b);
! 556: } else
! 557: #endif
! 558: {
! 559: p->flag = 0;
! 560: qinsert(p, &cc_q1a);
! 561: }
! 562: places[i] = -1;
! 563: p->places = i;
! 564: #ifdef STATS
! 565: ntoken_stat++;
! 566: #endif
! 567: } else if (p->time + length > time) {
! 568: /*
! 569: * overlapping token, don't count as two and
! 570: * don't update time, but do adjust weight to offset
! 571: * the difference
! 572: */
! 573: #ifndef cc_weight
! 574: if (cc_weight != 0) { /* XXX */
! 575: p->weight += time - p->time;
! 576: if (!p->flag && p->weight >= wthreshold) {
! 577: p->flag = 1;
! 578: *pp++ = p;
! 579: qinsert(p, &cc_q1b);
! 580: }
! 581: }
! 582: #endif
! 583: places[i] = p->places;
! 584: p->places = i;
! 585: } else {
! 586: #ifndef cc_weight
! 587: if ((p->weight += p->time - time) < 0)
! 588: p->weight = cc_weight;
! 589: else if ((p->weight += cc_weight) > limit)
! 590: p->weight = limit;
! 591: #endif
! 592: p->time = time;
! 593: p->bcount++;
! 594: if (!p->flag &&
! 595: /* code must be < 0 if flag false here */
! 596: (p->bcount >= threshold
! 597: #ifndef cc_weight
! 598: || p->weight >= wthreshold
! 599: #endif
! 600: )) {
! 601: p->flag = 1;
! 602: *pp++ = p;
! 603: qinsert(p, &cc_q1b);
! 604: }
! 605: places[i] = p->places;
! 606: p->places = i;
! 607: }
! 608: }
! 609: if ((i = pp - tokens) > 0) {
! 610: *pp = 0;
! 611: if (cc_reverse)
! 612: cc_sweep_reverse(tokens, places);
! 613: if (cc_sort && i > 1) {
! 614: int cc_token_compare();
! 615: qsort((char *) tokens, i, sizeof *tokens,
! 616: cc_token_compare);
! 617: }
! 618: if (cc_chop) {
! 619: if ((i = i * cc_chop / 100) == 0)
! 620: i = 1;
! 621: tokens[i] = 0;
! 622: }
! 623: i++;
! 624: }
! 625: return i;
! 626: }
! 627:
! 628: cc_sweep_reverse(pp, places)
! 629: register struct cc **pp;
! 630: register short *places;
! 631: {
! 632: register struct cc *p;
! 633: register short front, back, t;
! 634:
! 635: while ((p = *pp++) != 0) {
! 636: back = -1;
! 637: t = p->places;
! 638: /* the list is never empty */
! 639: do {
! 640: front = places[t];
! 641: places[t] = back;
! 642: back = t;
! 643: } while ((t = front) >= 0);
! 644: p->places = back;
! 645: }
! 646: }
! 647:
! 648: cc_compress_phase(output, bufsize, tokens, ntoken)
! 649: struct cc **output;
! 650: struct cc **tokens;
! 651: {
! 652: register i;
! 653:
! 654: bzero((char *) output, bufsize * sizeof *output);
! 655: for (i = 0; i < cc_npass0; i++)
! 656: cc_compress_phase1(output, tokens, ntoken, 0);
! 657: for (i = 0; i < cc_npass1; i++)
! 658: cc_compress_phase1(output, tokens, ntoken, 1);
! 659: cc_compress_cleanup(output, bufsize);
! 660: }
! 661:
! 662: cc_compress_phase1(output, tokens, ntoken, flag)
! 663: register struct cc **output;
! 664: struct cc **tokens;
! 665: {
! 666: register struct cc **pp;
! 667: #ifdef STATS
! 668: register int i = 0;
! 669: int nt = 0, cc = 0, nc = 0;
! 670: #endif
! 671:
! 672: #ifdef STATS
! 673: if (verbose >= 0)
! 674: time_begin();
! 675: if (verbose > 0)
! 676: printf("Compress:");
! 677: #endif
! 678: pp = tokens;
! 679: while (pp < tokens + ntoken) {
! 680: #ifdef STATS
! 681: if (verbose > 0) {
! 682: ntoken_stat = 0;
! 683: ccount_stat = 0;
! 684: ncover_stat = 0;
! 685: if (i > 2) {
! 686: printf("\n ");
! 687: i = 0;
! 688: }
! 689: i++;
! 690: printf(" (%d", (*pp)->length);
! 691: (void) fflush(stdout);
! 692: }
! 693: #endif
! 694: pp += cc_compress(output, pp, flag);
! 695: #ifdef STATS
! 696: if (verbose > 0) {
! 697: printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
! 698: ncover_stat);
! 699: nt += ntoken_stat;
! 700: cc += ccount_stat;
! 701: nc += ncover_stat;
! 702: }
! 703: #endif
! 704: }
! 705: #ifdef STATS
! 706: if (verbose > 0)
! 707: printf("\n total: (%dt %du %dc)\n", nt, cc, nc);
! 708: if (verbose >= 0)
! 709: time_end();
! 710: #endif
! 711: }
! 712:
! 713: cc_compress_cleanup(output, bufsize)
! 714: register struct cc **output;
! 715: {
! 716: register struct cc **end;
! 717:
! 718: /* the previous output phase may have been interrupted */
! 719: qinsertq(&cc_q0b, &cc_q0a);
! 720: for (end = output + bufsize; output < end;) {
! 721: register struct cc *p;
! 722: register length;
! 723: if ((p = *output) == 0) {
! 724: output++;
! 725: continue;
! 726: }
! 727: length = p->length;
! 728: if (!p->flag) {
! 729: } else if (p->code >= 0) {
! 730: qinsert(p, &cc_q0b);
! 731: p->flag = 0;
! 732: } else if (p->ccount == 0) {
! 733: *output = 0;
! 734: } else if (p->ccount >= thresh(length)
! 735: #ifndef cc_weight
! 736: || wthreshp(p->weight, length)
! 737: #endif
! 738: ) {
! 739: p->flag = 0;
! 740: } else {
! 741: p->ccount = 0;
! 742: *output = 0;
! 743: }
! 744: output += length;
! 745: }
! 746: }
! 747:
! 748: cc_compress(output, tokens, flag)
! 749: struct cc **output;
! 750: struct cc **tokens;
! 751: char flag;
! 752: {
! 753: struct cc **pp = tokens;
! 754: register struct cc *p = *pp++;
! 755: int length = p->length;
! 756: int threshold = thresh(length);
! 757: #ifndef cc_weight
! 758: short wthreshold = wthresh(length);
! 759: #endif
! 760: short *places = cc_places[length];
! 761: int *initial_scores = cc_initial_scores[length];
! 762: int initial_score0 = put_token_score(length);
! 763:
! 764: do {
! 765: int score;
! 766: register struct cc_undo *undop;
! 767: int ccount;
! 768: #ifdef STATS
! 769: int ncover;
! 770: #endif
! 771: int i;
! 772:
! 773: ccount = p->ccount;
! 774: if ((short) ccount >= p->bcount)
! 775: continue;
! 776: if (p->code >= 0 || ccount >= threshold)
! 777: score = 0;
! 778: #ifndef cc_weight
! 779: else if (p->weight >= wthreshold)
! 780: /* allow one fewer match than normal */
! 781: /* XXX, should adjust for ccount */
! 782: score = - tt.tt_set_token_cost;
! 783: #endif
! 784: else
! 785: score = initial_scores[ccount];
! 786: undop = cc_undo;
! 787: #ifdef STATS
! 788: ncover = 0;
! 789: #endif
! 790: for (i = p->places; i >= 0; i = places[i]) {
! 791: register struct cc **jp;
! 792: register struct cc *x;
! 793: register struct cc **ip = output + i;
! 794: register score0 = initial_score0;
! 795: struct cc **iip = ip + length;
! 796: struct cc_undo *undop1 = undop;
! 797:
! 798: if ((x = *(jp = ip)) != 0)
! 799: goto z;
! 800: while (--jp >= output)
! 801: if ((x = *jp) != 0) {
! 802: if (jp + x->length > ip)
! 803: goto z;
! 804: break;
! 805: }
! 806: jp = ip + 1;
! 807: while (jp < iip) {
! 808: if ((x = *jp) == 0) {
! 809: jp++;
! 810: continue;
! 811: }
! 812: z:
! 813: if (x == p)
! 814: goto undo;
! 815: #ifdef STATS
! 816: ncover++;
! 817: #endif
! 818: undop->pos = jp;
! 819: undop->val = x;
! 820: undop++;
! 821: *jp = 0;
! 822: x->ccount--;
! 823: score_adjust(score0, x);
! 824: if (score0 < 0 && flag)
! 825: goto undo;
! 826: jp += x->length;
! 827: }
! 828: undop->pos = ip;
! 829: undop->val = 0;
! 830: undop++;
! 831: *ip = p;
! 832: ccount++;
! 833: score += score0;
! 834: continue;
! 835: undo:
! 836: while (--undop >= undop1)
! 837: if (*undop->pos = x = undop->val)
! 838: x->ccount++;
! 839: undop++;
! 840: }
! 841: if (score > 0) {
! 842: #ifdef STATS
! 843: ccount_stat += ccount - p->ccount;
! 844: ntoken_stat++;
! 845: ncover_stat += ncover;
! 846: #endif
! 847: p->ccount = ccount;
! 848: } else {
! 849: register struct cc_undo *u = cc_undo;
! 850: while (--undop >= u) {
! 851: register struct cc *x;
! 852: if (*undop->pos = x = undop->val)
! 853: x->ccount++;
! 854: }
! 855: }
! 856: } while ((p = *pp++) != 0);
! 857: return pp - tokens;
! 858: }
! 859:
! 860: cc_output_phase(buffer, output, bufsize)
! 861: register char *buffer;
! 862: register struct cc **output;
! 863: register bufsize;
! 864: {
! 865: register i;
! 866: register struct cc *p, *p1;
! 867:
! 868: for (i = 0; i < bufsize;) {
! 869: if ((p = output[i]) == 0) {
! 870: ttputc(buffer[i]);
! 871: i++;
! 872: } else if (p->code >= 0) {
! 873: if (--p->ccount == 0)
! 874: qinsert(p, &cc_q0a);
! 875: (*tt.tt_put_token)(p->code, p->string, p->length);
! 876: wwntokuse++;
! 877: wwntoksave += put_token_score(p->length);
! 878: i += p->length;
! 879: } else if ((p1 = cc_q0a.qback) != &cc_q0a) {
! 880: p->code = p1->code;
! 881: p1->code = -1;
! 882: qinsert(p1, &cc_q1a);
! 883: if (--p->ccount == 0)
! 884: qinsert(p, &cc_q0a);
! 885: else
! 886: qinsert(p, &cc_q0b);
! 887: (*tt.tt_set_token)(p->code, p->string, p->length);
! 888: wwntokdef++;
! 889: wwntoksave -= tt.tt_set_token_cost;
! 890: i += p->length;
! 891: } else {
! 892: p->ccount--;
! 893: ttwrite(p->string, p->length);
! 894: wwntokbad++;
! 895: i += p->length;
! 896: }
! 897: }
! 898: wwntokc += bufsize;
! 899: }
! 900:
! 901: cc_token_compare(p1, p2)
! 902: struct cc **p1, **p2;
! 903: {
! 904: return (*p2)->bcount - (*p1)->bcount;
! 905: }