Annotation of src/usr.bin/window/compress.c, Revision 1.5
1.5 ! mpech 1: /* $OpenBSD: compress.c,v 1.4 1998/04/26 22:49:04 millert Exp $ */
1.1 deraadt 2: /* $NetBSD: compress.c,v 1.3 1995/09/28 10:34:13 tls Exp $ */
3:
4: /*
5: * Copyright (c) 1989, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * Edward Wang at The University of California, Berkeley.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by the University of
22: * California, Berkeley and its contributors.
23: * 4. Neither the name of the University nor the names of its contributors
24: * may be used to endorse or promote products derived from this software
25: * without specific prior written permission.
26: *
27: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37: * SUCH DAMAGE.
38: */
39:
40: #ifndef lint
41: #if 0
42: static char sccsid[] = "@(#)compress.c 8.1 (Berkeley) 6/6/93";
43: #else
1.5 ! mpech 44: static char rcsid[] = "$OpenBSD: compress.c,v 1.4 1998/04/26 22:49:04 millert Exp $";
1.1 deraadt 45: #endif
46: #endif /* not lint */
1.3 downsj 47:
48: #include <stdlib.h>
1.1 deraadt 49:
50: #include "ww.h"
51: #include "tt.h"
52:
53: /* special */
54: #include <stdio.h>
55: #include <fcntl.h>
56: int cc_trace = 0;
57: FILE *cc_trace_fp;
58:
59: /* tunable parameters */
60:
61: int cc_reverse = 1;
62: int cc_sort = 0;
63: int cc_chop = 0;
64:
65: int cc_token_max = 8; /* <= TOKEN_MAX */
66: int cc_token_min = 2; /* > tt.tt_put_token_cost */
67: int cc_npass0 = 1;
68: int cc_npass1 = 1;
69:
70: int cc_bufsize = 1024 * 3; /* XXX, or 80 * 24 * 2 */
71:
72: int cc_ntoken = 8192;
73:
74: #define cc_weight XXX
75: #ifndef cc_weight
76: int cc_weight = 0;
77: #endif
78:
79: #define TOKEN_MAX 16
80:
81: struct cc {
82: char string[TOKEN_MAX];
83: char length;
84: char flag;
85: #ifndef cc_weight
86: short weight;
87: #endif
88: long time; /* time last seen */
89: short bcount; /* count in this buffer */
90: short ccount; /* count in compression */
91: short places; /* places in the buffer */
92: short code; /* token code */
93: struct cc *qforw, *qback;
94: struct cc *hforw, **hback;
95: };
96:
97: short cc_thresholds[TOKEN_MAX + 1];
98: #define thresh(length) (cc_thresholds[length])
99: #define threshp(code, count, length) \
100: ((code) >= 0 || (short) (count) >= cc_thresholds[length])
101:
102: #ifndef cc_weight
103: short cc_wthresholds[TOKEN_MAX + 1];
104: #define wthresh(length) (cc_wthresholds[length])
105: #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
106: #else
107: #define wthreshp(weight, length) (0)
108: #endif
109:
110: #ifndef cc_weight
111: short cc_wlimits[TOKEN_MAX + 1];
112: #define wlimit(length) (cc_wlimits[length])
113: #endif
114:
115: #define put_token_score(length) ((length) - tt.tt_put_token_cost)
116:
117: int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
118: #define score_adjust(score, p) \
119: do { \
120: int length = (p)->length; \
121: int ccount = (p)->ccount; \
122: if (threshp((p)->code, ccount, length) || \
123: wthreshp((p)->weight, length)) /* XXX */ \
124: (score) -= length - tt.tt_put_token_cost; \
125: else \
126: (score) += cc_score_adjustments[length][ccount]; \
127: } while (0)
128:
129: int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
130:
131: struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
132:
133: #define qinsert(p1, p2) \
134: do { \
1.5 ! mpech 135: struct cc *forw = (p1)->qforw; \
! 136: struct cc *back = (p1)->qback; \
1.1 deraadt 137: back->qforw = forw; \
138: forw->qback = back; \
139: forw = (p2)->qforw; \
140: (p1)->qforw = forw; \
141: forw->qback = (p1); \
142: (p2)->qforw = (p1); \
143: (p1)->qback = (p2); \
144: } while (0)
145:
146: #define qinsertq(q, p) \
147: ((q)->qforw == (q) ? 0 : \
148: ((q)->qback->qforw = (p)->qforw, \
149: (p)->qforw->qback = (q)->qback, \
150: (q)->qforw->qback = (p), \
151: (p)->qforw = (q)->qforw, \
152: (q)->qforw = (q), \
153: (q)->qback = (q)))
154:
155: #define H (14)
156: #define HSIZE (1 << H)
157: #define hash(h, c) ((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1)
158:
159: char *cc_buffer;
160: struct cc **cc_output; /* the output array */
161: short *cc_places[TOKEN_MAX + 1];
162: short *cc_hashcodes; /* for computing hashcodes */
163: struct cc **cc_htab; /* the hash table */
164: struct cc **cc_tokens; /* holds all the active tokens */
165: struct cc_undo {
166: struct cc **pos;
167: struct cc *val;
168: } *cc_undo;
169:
170: long cc_time, cc_time0;
171:
172: char *cc_tt_ob, *cc_tt_obe;
173:
174: ccinit()
175: {
1.5 ! mpech 176: int i, j;
! 177: struct cc *p;
1.1 deraadt 178:
179: if (tt.tt_token_max > cc_token_max)
180: tt.tt_token_max = cc_token_max;
181: if (tt.tt_token_min < cc_token_min)
182: tt.tt_token_min = cc_token_min;
183: if (tt.tt_token_min > tt.tt_token_max) {
184: tt.tt_ntoken = 0;
185: return 0;
186: }
187: if (tt.tt_ntoken > cc_ntoken / 2) /* not likely */
188: tt.tt_ntoken = cc_ntoken / 2;
189: #define C(x) (sizeof (x) / sizeof *(x))
190: for (i = 0; i < C(cc_thresholds); i++) {
191: int h = i - tt.tt_put_token_cost;
192: if (h > 0)
193: cc_thresholds[i] =
194: (tt.tt_set_token_cost + 1 + h - 1) / h + 1;
195: else
196: cc_thresholds[i] = 0;
197: }
198: for (i = 0; i < C(cc_score_adjustments); i++) {
199: int t = cc_thresholds[i];
200: for (j = 0; j < C(*cc_score_adjustments); j++) {
201: if (j >= t)
202: cc_score_adjustments[i][j] =
203: - (i - tt.tt_put_token_cost);
204: else if (j < t - 1)
205: cc_score_adjustments[i][j] = 0;
206: else
207: /*
208: * cost now is
209: * length * (ccount + 1) a
210: * cost before was
211: * set-token-cost + length +
212: * ccount * put-token-cost b
213: * the score adjustment is (b - a)
214: */
215: cc_score_adjustments[i][j] =
216: tt.tt_set_token_cost + i +
217: j * tt.tt_put_token_cost -
218: i * (j + 1);
219: if (j >= t)
220: cc_initial_scores[i][j] = 0;
221: else
222: /*
223: * - (set-token-cost +
224: * (length - put-token-cost) -
225: * (length - put-token-cost) * ccount)
226: */
227: cc_initial_scores[i][j] =
228: - (tt.tt_set_token_cost +
229: (i - tt.tt_put_token_cost) -
230: (i - tt.tt_put_token_cost) * j);
231: }
232: }
233: #ifndef cc_weight
234: for (i = 1; i < C(cc_wthresholds); i++) {
235: cc_wthresholds[i] =
236: ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
237: i / 5 + 1) *
238: cc_weight + 1;
239: cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
240: }
241: #endif
242: #undef C
243: if ((cc_output = (struct cc **)
1.4 millert 244: malloc(cc_bufsize * sizeof *cc_output)) == 0)
1.1 deraadt 245: goto nomem;
246: if ((cc_hashcodes = (short *)
1.4 millert 247: malloc(cc_bufsize * sizeof *cc_hashcodes)) == 0)
1.1 deraadt 248: goto nomem;
249: if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
250: goto nomem;
251: if ((cc_tokens = (struct cc **)
1.4 millert 252: malloc((cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
1.1 deraadt 253: sizeof *cc_tokens)) == 0)
254: goto nomem;
255: if ((cc_undo = (struct cc_undo *)
1.4 millert 256: malloc(cc_bufsize * sizeof *cc_undo)) == 0)
1.1 deraadt 257: goto nomem;
258: for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
259: if ((cc_places[i] = (short *)
1.4 millert 260: malloc(cc_bufsize * sizeof **cc_places)) == 0)
1.1 deraadt 261: goto nomem;
262: cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
263: cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
264: cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
265: cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
1.4 millert 266: if ((p = (struct cc *) malloc(cc_ntoken * sizeof *p)) == 0)
1.1 deraadt 267: goto nomem;
268: for (i = 0; i < tt.tt_ntoken; i++) {
269: p->code = i;
270: p->time = -1;
271: p->qback = cc_q0a.qback;
272: p->qforw = &cc_q0a;
273: p->qback->qforw = p;
274: cc_q0a.qback = p;
275: p++;
276: }
277: for (; i < cc_ntoken; i++) {
278: p->code = -1;
279: p->time = -1;
280: p->qback = cc_q1a.qback;
281: p->qforw = &cc_q1a;
282: p->qback->qforw = p;
283: cc_q1a.qback = p;
284: p++;
285: }
286: cc_tt_ob = tt_ob;
287: cc_tt_obe = tt_obe;
1.4 millert 288: if ((cc_buffer = malloc(cc_bufsize)) == 0)
1.1 deraadt 289: goto nomem;
290: return 0;
291: nomem:
292: wwerrno = WWE_NOMEM;
293: return -1;
294: }
295:
296: ccstart()
297: {
298: int ccflush();
299:
300: ttflush();
301: tt_obp = tt_ob = cc_buffer;
302: tt_obe = tt_ob + cc_bufsize;
303: tt.tt_flush = ccflush;
304: if (cc_trace) {
305: cc_trace_fp = fopen("window-trace", "a");
306: (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
307: }
308: ccreset();
309: }
310:
311: ccreset()
312: {
1.5 ! mpech 313: struct cc *p;
1.1 deraadt 314:
315: bzero((char *) cc_htab, HSIZE * sizeof *cc_htab);
316: for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
317: p->hback = 0;
318: for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
319: p->hback = 0;
320: }
321:
322: ccend()
323: {
324:
325: ttflush();
326: tt_obp = tt_ob = cc_tt_ob;
327: tt_obe = cc_tt_obe;
328: tt.tt_flush = 0;
329: if (cc_trace_fp != NULL) {
330: (void) fclose(cc_trace_fp);
331: cc_trace_fp = NULL;
332: }
333: }
334:
335: ccflush()
336: {
337: int bufsize = tt_obp - tt_ob;
338: int n;
339:
340: if (tt_ob != cc_buffer)
341: abort();
342: if (cc_trace_fp != NULL) {
343: (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
344: (void) putc(-1, cc_trace_fp);
345: }
346: tt.tt_flush = 0;
347: (*tt.tt_compress)(1);
348: if (bufsize < tt.tt_token_min) {
349: ttflush();
350: goto out;
351: }
352: tt_obp = tt_ob = cc_tt_ob;
353: tt_obe = cc_tt_obe;
354: cc_time0 = cc_time;
355: cc_time += bufsize;
356: n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
357: cc_compress_phase(cc_output, bufsize, cc_tokens, n);
358: cc_output_phase(cc_buffer, cc_output, bufsize);
359: ttflush();
360: tt_obp = tt_ob = cc_buffer;
361: tt_obe = cc_buffer + cc_bufsize;
362: out:
363: (*tt.tt_compress)(0);
364: tt.tt_flush = ccflush;
365: }
366:
367: cc_sweep_phase(buffer, bufsize, tokens)
368: char *buffer;
369: struct cc **tokens;
370: {
1.5 ! mpech 371: struct cc **pp = tokens;
! 372: int i, n;
1.1 deraadt 373: #ifdef STATS
374: int nn, ii;
375: #endif
376:
377: #ifdef STATS
378: if (verbose >= 0)
379: time_begin();
380: if (verbose > 0)
381: printf("Sweep:");
382: #endif
383: cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
384: #ifdef STATS
385: ntoken_stat = 0;
386: nn = 0;
387: ii = 0;
388: #endif
389: for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
390: #ifdef STATS
391: if (verbose > 0) {
392: if (ii > 7) {
393: printf("\n ");
394: ii = 0;
395: }
396: ii++;
397: printf(" (%d", i);
398: (void) fflush(stdout);
399: }
400: #endif
401: n = cc_sweep(buffer, bufsize, pp, i);
402: pp += n;
403: #ifdef STATS
404: if (verbose > 0) {
405: if (--n > 0) {
406: printf(" %d", n);
407: nn += n;
408: }
409: putchar(')');
410: }
411: #endif
412: }
413: qinsertq(&cc_q1b, &cc_q1a);
414: #ifdef STATS
415: if (verbose > 0)
416: printf("\n %d tokens, %d candidates\n",
417: ntoken_stat, nn);
418: if (verbose >= 0)
419: time_end();
420: #endif
421: return pp - tokens;
422: }
423:
424: cc_sweep0(buffer, n, length)
425: char *buffer;
426: {
1.5 ! mpech 427: char *p;
! 428: short *hc;
! 429: int i;
! 430: short c;
! 431: short pc = tt.tt_padc;
1.1 deraadt 432:
433: /* n and length are at least 1 */
434: p = buffer++;
435: hc = cc_hashcodes;
436: i = n;
437: do {
438: if ((*hc++ = *p++) == pc)
439: hc[-1] = -1;
440: } while (--i);
441: while (--length) {
442: p = buffer++;
443: hc = cc_hashcodes;
444: for (i = n--; --i;) {
445: if ((c = *p++) == pc || *hc < 0)
446: c = -1;
447: else
448: c = hash(*hc, c);
449: *hc++ = c;
450: }
451: }
452: }
453:
454: cc_sweep(buffer, bufsize, tokens, length)
455: char *buffer;
456: struct cc **tokens;
1.5 ! mpech 457: int length;
1.1 deraadt 458: {
1.5 ! mpech 459: struct cc *p;
! 460: char *cp;
! 461: int i;
1.1 deraadt 462: short *hc;
463: short *places = cc_places[length];
464: struct cc **pp = tokens;
465: short threshold = thresh(length);
466: #ifndef cc_weight
467: short wthreshold = wthresh(length);
468: short limit = wlimit(length);
469: #endif
470: int time;
471: short pc = tt.tt_padc;
472:
473: i = length - 1;
474: bufsize -= i;
475: cp = buffer + i;
476: hc = cc_hashcodes;
477: time = cc_time0;
478: for (i = 0; i < bufsize; i++, time++) {
479: struct cc **h;
480:
481: {
1.5 ! mpech 482: short *hc1 = hc;
! 483: short c = *cp++;
! 484: short hh;
1.1 deraadt 485: if ((hh = *hc1) < 0 || c == pc) {
486: *hc1++ = -1;
487: hc = hc1;
488: continue;
489: }
490: h = cc_htab + (*hc1++ = hash(hh, c));
491: hc = hc1;
492: }
493: for (p = *h; p != 0; p = p->hforw)
494: if (p->length == (char) length) {
1.5 ! mpech 495: char *p1 = p->string;
! 496: char *p2 = cp - length;
! 497: int n = length;
1.1 deraadt 498: do
499: if (*p1++ != *p2++)
500: goto fail;
501: while (--n);
502: break;
503: fail:;
504: }
505: if (p == 0) {
506: p = cc_q1a.qback;
507: if (p == &cc_q1a ||
508: p->time >= cc_time0 && p->length == (char) length)
509: continue;
510: if (p->hback != 0)
511: if ((*p->hback = p->hforw) != 0)
512: p->hforw->hback = p->hback;
513: {
1.5 ! mpech 514: char *p1 = p->string;
! 515: char *p2 = cp - length;
! 516: int n = length;
1.1 deraadt 517: do
518: *p1++ = *p2++;
519: while (--n);
520: }
521: p->length = length;
522: #ifndef cc_weight
523: p->weight = cc_weight;
524: #endif
525: p->time = time;
526: p->bcount = 1;
527: p->ccount = 0;
528: p->flag = 0;
529: if ((p->hforw = *h) != 0)
530: p->hforw->hback = &p->hforw;
531: *h = p;
532: p->hback = h;
533: qinsert(p, &cc_q1a);
534: places[i] = -1;
535: p->places = i;
536: #ifdef STATS
537: ntoken_stat++;
538: #endif
539: } else if (p->time < cc_time0) {
540: #ifndef cc_weight
541: if ((p->weight += p->time - time) < 0)
542: p->weight = cc_weight;
543: else if ((p->weight += cc_weight) > limit)
544: p->weight = limit;
545: #endif
546: p->time = time;
547: p->bcount = 1;
548: p->ccount = 0;
549: if (p->code >= 0) {
550: p->flag = 1;
551: *pp++ = p;
552: } else
553: #ifndef cc_weight
554: if (p->weight >= wthreshold) {
555: p->flag = 1;
556: *pp++ = p;
557: qinsert(p, &cc_q1b);
558: } else
559: #endif
560: {
561: p->flag = 0;
562: qinsert(p, &cc_q1a);
563: }
564: places[i] = -1;
565: p->places = i;
566: #ifdef STATS
567: ntoken_stat++;
568: #endif
569: } else if (p->time + length > time) {
570: /*
571: * overlapping token, don't count as two and
572: * don't update time, but do adjust weight to offset
573: * the difference
574: */
575: #ifndef cc_weight
576: if (cc_weight != 0) { /* XXX */
577: p->weight += time - p->time;
578: if (!p->flag && p->weight >= wthreshold) {
579: p->flag = 1;
580: *pp++ = p;
581: qinsert(p, &cc_q1b);
582: }
583: }
584: #endif
585: places[i] = p->places;
586: p->places = i;
587: } else {
588: #ifndef cc_weight
589: if ((p->weight += p->time - time) < 0)
590: p->weight = cc_weight;
591: else if ((p->weight += cc_weight) > limit)
592: p->weight = limit;
593: #endif
594: p->time = time;
595: p->bcount++;
596: if (!p->flag &&
597: /* code must be < 0 if flag false here */
598: (p->bcount >= threshold
599: #ifndef cc_weight
600: || p->weight >= wthreshold
601: #endif
602: )) {
603: p->flag = 1;
604: *pp++ = p;
605: qinsert(p, &cc_q1b);
606: }
607: places[i] = p->places;
608: p->places = i;
609: }
610: }
611: if ((i = pp - tokens) > 0) {
612: *pp = 0;
613: if (cc_reverse)
614: cc_sweep_reverse(tokens, places);
615: if (cc_sort && i > 1) {
616: int cc_token_compare();
617: qsort((char *) tokens, i, sizeof *tokens,
618: cc_token_compare);
619: }
620: if (cc_chop) {
621: if ((i = i * cc_chop / 100) == 0)
622: i = 1;
623: tokens[i] = 0;
624: }
625: i++;
626: }
627: return i;
628: }
629:
630: cc_sweep_reverse(pp, places)
1.5 ! mpech 631: struct cc **pp;
! 632: short *places;
1.1 deraadt 633: {
1.5 ! mpech 634: struct cc *p;
! 635: short front, back, t;
1.1 deraadt 636:
637: while ((p = *pp++) != 0) {
638: back = -1;
639: t = p->places;
640: /* the list is never empty */
641: do {
642: front = places[t];
643: places[t] = back;
644: back = t;
645: } while ((t = front) >= 0);
646: p->places = back;
647: }
648: }
649:
650: cc_compress_phase(output, bufsize, tokens, ntoken)
651: struct cc **output;
652: struct cc **tokens;
653: {
1.5 ! mpech 654: int i;
1.1 deraadt 655:
656: bzero((char *) output, bufsize * sizeof *output);
657: for (i = 0; i < cc_npass0; i++)
658: cc_compress_phase1(output, tokens, ntoken, 0);
659: for (i = 0; i < cc_npass1; i++)
660: cc_compress_phase1(output, tokens, ntoken, 1);
661: cc_compress_cleanup(output, bufsize);
662: }
663:
664: cc_compress_phase1(output, tokens, ntoken, flag)
1.5 ! mpech 665: struct cc **output;
1.1 deraadt 666: struct cc **tokens;
667: {
1.5 ! mpech 668: struct cc **pp;
1.1 deraadt 669: #ifdef STATS
1.5 ! mpech 670: int i = 0;
1.1 deraadt 671: int nt = 0, cc = 0, nc = 0;
672: #endif
673:
674: #ifdef STATS
675: if (verbose >= 0)
676: time_begin();
677: if (verbose > 0)
678: printf("Compress:");
679: #endif
680: pp = tokens;
681: while (pp < tokens + ntoken) {
682: #ifdef STATS
683: if (verbose > 0) {
684: ntoken_stat = 0;
685: ccount_stat = 0;
686: ncover_stat = 0;
687: if (i > 2) {
688: printf("\n ");
689: i = 0;
690: }
691: i++;
692: printf(" (%d", (*pp)->length);
693: (void) fflush(stdout);
694: }
695: #endif
696: pp += cc_compress(output, pp, flag);
697: #ifdef STATS
698: if (verbose > 0) {
699: printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
700: ncover_stat);
701: nt += ntoken_stat;
702: cc += ccount_stat;
703: nc += ncover_stat;
704: }
705: #endif
706: }
707: #ifdef STATS
708: if (verbose > 0)
709: printf("\n total: (%dt %du %dc)\n", nt, cc, nc);
710: if (verbose >= 0)
711: time_end();
712: #endif
713: }
714:
715: cc_compress_cleanup(output, bufsize)
1.5 ! mpech 716: struct cc **output;
1.1 deraadt 717: {
1.5 ! mpech 718: struct cc **end;
1.1 deraadt 719:
720: /* the previous output phase may have been interrupted */
721: qinsertq(&cc_q0b, &cc_q0a);
722: for (end = output + bufsize; output < end;) {
1.5 ! mpech 723: struct cc *p;
! 724: int length;
1.1 deraadt 725: if ((p = *output) == 0) {
726: output++;
727: continue;
728: }
729: length = p->length;
730: if (!p->flag) {
731: } else if (p->code >= 0) {
732: qinsert(p, &cc_q0b);
733: p->flag = 0;
734: } else if (p->ccount == 0) {
735: *output = 0;
736: } else if (p->ccount >= thresh(length)
737: #ifndef cc_weight
738: || wthreshp(p->weight, length)
739: #endif
740: ) {
741: p->flag = 0;
742: } else {
743: p->ccount = 0;
744: *output = 0;
745: }
746: output += length;
747: }
748: }
749:
750: cc_compress(output, tokens, flag)
751: struct cc **output;
752: struct cc **tokens;
753: char flag;
754: {
755: struct cc **pp = tokens;
1.5 ! mpech 756: struct cc *p = *pp++;
1.1 deraadt 757: int length = p->length;
758: int threshold = thresh(length);
759: #ifndef cc_weight
760: short wthreshold = wthresh(length);
761: #endif
762: short *places = cc_places[length];
763: int *initial_scores = cc_initial_scores[length];
764: int initial_score0 = put_token_score(length);
765:
766: do {
767: int score;
1.5 ! mpech 768: struct cc_undo *undop;
1.1 deraadt 769: int ccount;
770: #ifdef STATS
771: int ncover;
772: #endif
773: int i;
774:
775: ccount = p->ccount;
776: if ((short) ccount >= p->bcount)
777: continue;
778: if (p->code >= 0 || ccount >= threshold)
779: score = 0;
780: #ifndef cc_weight
781: else if (p->weight >= wthreshold)
782: /* allow one fewer match than normal */
783: /* XXX, should adjust for ccount */
784: score = - tt.tt_set_token_cost;
785: #endif
786: else
787: score = initial_scores[ccount];
788: undop = cc_undo;
789: #ifdef STATS
790: ncover = 0;
791: #endif
792: for (i = p->places; i >= 0; i = places[i]) {
1.5 ! mpech 793: struct cc **jp;
! 794: struct cc *x;
! 795: struct cc **ip = output + i;
! 796: int score0 = initial_score0;
1.1 deraadt 797: struct cc **iip = ip + length;
798: struct cc_undo *undop1 = undop;
799:
800: if ((x = *(jp = ip)) != 0)
801: goto z;
802: while (--jp >= output)
803: if ((x = *jp) != 0) {
804: if (jp + x->length > ip)
805: goto z;
806: break;
807: }
808: jp = ip + 1;
809: while (jp < iip) {
810: if ((x = *jp) == 0) {
811: jp++;
812: continue;
813: }
814: z:
815: if (x == p)
816: goto undo;
817: #ifdef STATS
818: ncover++;
819: #endif
820: undop->pos = jp;
821: undop->val = x;
822: undop++;
823: *jp = 0;
824: x->ccount--;
825: score_adjust(score0, x);
826: if (score0 < 0 && flag)
827: goto undo;
828: jp += x->length;
829: }
830: undop->pos = ip;
831: undop->val = 0;
832: undop++;
833: *ip = p;
834: ccount++;
835: score += score0;
836: continue;
837: undo:
838: while (--undop >= undop1)
839: if (*undop->pos = x = undop->val)
840: x->ccount++;
841: undop++;
842: }
843: if (score > 0) {
844: #ifdef STATS
845: ccount_stat += ccount - p->ccount;
846: ntoken_stat++;
847: ncover_stat += ncover;
848: #endif
849: p->ccount = ccount;
850: } else {
1.5 ! mpech 851: struct cc_undo *u = cc_undo;
1.1 deraadt 852: while (--undop >= u) {
1.5 ! mpech 853: struct cc *x;
1.1 deraadt 854: if (*undop->pos = x = undop->val)
855: x->ccount++;
856: }
857: }
858: } while ((p = *pp++) != 0);
859: return pp - tokens;
860: }
861:
862: cc_output_phase(buffer, output, bufsize)
1.5 ! mpech 863: char *buffer;
! 864: struct cc **output;
! 865: int bufsize;
1.1 deraadt 866: {
1.5 ! mpech 867: int i;
! 868: struct cc *p, *p1;
1.1 deraadt 869:
870: for (i = 0; i < bufsize;) {
871: if ((p = output[i]) == 0) {
872: ttputc(buffer[i]);
873: i++;
874: } else if (p->code >= 0) {
875: if (--p->ccount == 0)
876: qinsert(p, &cc_q0a);
877: (*tt.tt_put_token)(p->code, p->string, p->length);
878: wwntokuse++;
879: wwntoksave += put_token_score(p->length);
880: i += p->length;
881: } else if ((p1 = cc_q0a.qback) != &cc_q0a) {
882: p->code = p1->code;
883: p1->code = -1;
884: qinsert(p1, &cc_q1a);
885: if (--p->ccount == 0)
886: qinsert(p, &cc_q0a);
887: else
888: qinsert(p, &cc_q0b);
889: (*tt.tt_set_token)(p->code, p->string, p->length);
890: wwntokdef++;
891: wwntoksave -= tt.tt_set_token_cost;
892: i += p->length;
893: } else {
894: p->ccount--;
895: ttwrite(p->string, p->length);
896: wwntokbad++;
897: i += p->length;
898: }
899: }
900: wwntokc += bufsize;
901: }
902:
903: cc_token_compare(p1, p2)
904: struct cc **p1, **p2;
905: {
906: return (*p2)->bcount - (*p1)->bcount;
907: }