Annotation of src/usr.bin/window/compress.c, Revision 1.8
1.8 ! deraadt 1: /* $OpenBSD: compress.c,v 1.7 2003/07/10 00:06:52 david Exp $ */
1.1 deraadt 2: /* $NetBSD: compress.c,v 1.3 1995/09/28 10:34:13 tls Exp $ */
3:
4: /*
5: * Copyright (c) 1989, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * Edward Wang at The University of California, Berkeley.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
1.6 millert 19: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: */
35:
36: #ifndef lint
37: #if 0
38: static char sccsid[] = "@(#)compress.c 8.1 (Berkeley) 6/6/93";
39: #else
1.8 ! deraadt 40: static char rcsid[] = "$OpenBSD: compress.c,v 1.7 2003/07/10 00:06:52 david Exp $";
1.1 deraadt 41: #endif
42: #endif /* not lint */
1.3 downsj 43:
44: #include <stdlib.h>
1.7 david 45: #include <string.h>
1.1 deraadt 46:
47: #include "ww.h"
48: #include "tt.h"
49:
50: /* special */
51: #include <stdio.h>
52: #include <fcntl.h>
53: int cc_trace = 0;
54: FILE *cc_trace_fp;
55:
56: /* tunable parameters */
57:
58: int cc_reverse = 1;
59: int cc_sort = 0;
60: int cc_chop = 0;
61:
62: int cc_token_max = 8; /* <= TOKEN_MAX */
63: int cc_token_min = 2; /* > tt.tt_put_token_cost */
64: int cc_npass0 = 1;
65: int cc_npass1 = 1;
66:
67: int cc_bufsize = 1024 * 3; /* XXX, or 80 * 24 * 2 */
68:
69: int cc_ntoken = 8192;
70:
71: #define cc_weight XXX
72: #ifndef cc_weight
73: int cc_weight = 0;
74: #endif
75:
76: #define TOKEN_MAX 16
77:
78: struct cc {
79: char string[TOKEN_MAX];
80: char length;
81: char flag;
82: #ifndef cc_weight
83: short weight;
84: #endif
85: long time; /* time last seen */
86: short bcount; /* count in this buffer */
87: short ccount; /* count in compression */
88: short places; /* places in the buffer */
89: short code; /* token code */
90: struct cc *qforw, *qback;
91: struct cc *hforw, **hback;
92: };
93:
94: short cc_thresholds[TOKEN_MAX + 1];
95: #define thresh(length) (cc_thresholds[length])
96: #define threshp(code, count, length) \
97: ((code) >= 0 || (short) (count) >= cc_thresholds[length])
98:
99: #ifndef cc_weight
100: short cc_wthresholds[TOKEN_MAX + 1];
101: #define wthresh(length) (cc_wthresholds[length])
102: #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
103: #else
104: #define wthreshp(weight, length) (0)
105: #endif
106:
107: #ifndef cc_weight
108: short cc_wlimits[TOKEN_MAX + 1];
109: #define wlimit(length) (cc_wlimits[length])
110: #endif
111:
112: #define put_token_score(length) ((length) - tt.tt_put_token_cost)
113:
114: int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
115: #define score_adjust(score, p) \
116: do { \
117: int length = (p)->length; \
118: int ccount = (p)->ccount; \
119: if (threshp((p)->code, ccount, length) || \
120: wthreshp((p)->weight, length)) /* XXX */ \
121: (score) -= length - tt.tt_put_token_cost; \
122: else \
123: (score) += cc_score_adjustments[length][ccount]; \
124: } while (0)
125:
126: int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
127:
128: struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
129:
130: #define qinsert(p1, p2) \
131: do { \
1.5 mpech 132: struct cc *forw = (p1)->qforw; \
133: struct cc *back = (p1)->qback; \
1.1 deraadt 134: back->qforw = forw; \
135: forw->qback = back; \
136: forw = (p2)->qforw; \
137: (p1)->qforw = forw; \
138: forw->qback = (p1); \
139: (p2)->qforw = (p1); \
140: (p1)->qback = (p2); \
141: } while (0)
142:
143: #define qinsertq(q, p) \
144: ((q)->qforw == (q) ? 0 : \
145: ((q)->qback->qforw = (p)->qforw, \
146: (p)->qforw->qback = (q)->qback, \
147: (q)->qforw->qback = (p), \
148: (p)->qforw = (q)->qforw, \
149: (q)->qforw = (q), \
150: (q)->qback = (q)))
151:
152: #define H (14)
153: #define HSIZE (1 << H)
154: #define hash(h, c) ((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1)
155:
156: char *cc_buffer;
157: struct cc **cc_output; /* the output array */
158: short *cc_places[TOKEN_MAX + 1];
159: short *cc_hashcodes; /* for computing hashcodes */
160: struct cc **cc_htab; /* the hash table */
161: struct cc **cc_tokens; /* holds all the active tokens */
162: struct cc_undo {
163: struct cc **pos;
164: struct cc *val;
165: } *cc_undo;
166:
167: long cc_time, cc_time0;
168:
169: char *cc_tt_ob, *cc_tt_obe;
170:
171: ccinit()
172: {
1.5 mpech 173: int i, j;
174: struct cc *p;
1.1 deraadt 175:
176: if (tt.tt_token_max > cc_token_max)
177: tt.tt_token_max = cc_token_max;
178: if (tt.tt_token_min < cc_token_min)
179: tt.tt_token_min = cc_token_min;
180: if (tt.tt_token_min > tt.tt_token_max) {
181: tt.tt_ntoken = 0;
182: return 0;
183: }
184: if (tt.tt_ntoken > cc_ntoken / 2) /* not likely */
185: tt.tt_ntoken = cc_ntoken / 2;
186: #define C(x) (sizeof (x) / sizeof *(x))
187: for (i = 0; i < C(cc_thresholds); i++) {
188: int h = i - tt.tt_put_token_cost;
189: if (h > 0)
190: cc_thresholds[i] =
191: (tt.tt_set_token_cost + 1 + h - 1) / h + 1;
192: else
193: cc_thresholds[i] = 0;
194: }
195: for (i = 0; i < C(cc_score_adjustments); i++) {
196: int t = cc_thresholds[i];
197: for (j = 0; j < C(*cc_score_adjustments); j++) {
198: if (j >= t)
199: cc_score_adjustments[i][j] =
200: - (i - tt.tt_put_token_cost);
201: else if (j < t - 1)
202: cc_score_adjustments[i][j] = 0;
203: else
204: /*
205: * cost now is
206: * length * (ccount + 1) a
207: * cost before was
208: * set-token-cost + length +
209: * ccount * put-token-cost b
210: * the score adjustment is (b - a)
211: */
212: cc_score_adjustments[i][j] =
213: tt.tt_set_token_cost + i +
214: j * tt.tt_put_token_cost -
215: i * (j + 1);
216: if (j >= t)
217: cc_initial_scores[i][j] = 0;
218: else
219: /*
220: * - (set-token-cost +
221: * (length - put-token-cost) -
222: * (length - put-token-cost) * ccount)
223: */
224: cc_initial_scores[i][j] =
225: - (tt.tt_set_token_cost +
226: (i - tt.tt_put_token_cost) -
227: (i - tt.tt_put_token_cost) * j);
228: }
229: }
230: #ifndef cc_weight
231: for (i = 1; i < C(cc_wthresholds); i++) {
232: cc_wthresholds[i] =
233: ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
234: i / 5 + 1) *
235: cc_weight + 1;
236: cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
237: }
238: #endif
239: #undef C
240: if ((cc_output = (struct cc **)
1.8 ! deraadt 241: calloc(cc_bufsize, sizeof *cc_output)) == 0)
1.1 deraadt 242: goto nomem;
243: if ((cc_hashcodes = (short *)
1.8 ! deraadt 244: calloc(cc_bufsize, sizeof *cc_hashcodes)) == 0)
1.1 deraadt 245: goto nomem;
1.8 ! deraadt 246: if ((cc_htab = (struct cc **) calloc(HSIZE, sizeof *cc_htab)) == 0)
1.1 deraadt 247: goto nomem;
248: if ((cc_tokens = (struct cc **)
1.8 ! deraadt 249: malloc(cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1),
! 250: sizeof *cc_tokens) == 0)
1.1 deraadt 251: goto nomem;
252: if ((cc_undo = (struct cc_undo *)
1.8 ! deraadt 253: calloc(cc_bufsize, sizeof *cc_undo)) == 0)
1.1 deraadt 254: goto nomem;
255: for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
256: if ((cc_places[i] = (short *)
1.8 ! deraadt 257: calloc(cc_bufsize, sizeof **cc_places)) == 0)
1.1 deraadt 258: goto nomem;
259: cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
260: cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
261: cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
262: cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
1.8 ! deraadt 263: if ((p = (struct cc *) calloc(cc_ntoken, sizeof *p)) == 0)
1.1 deraadt 264: goto nomem;
265: for (i = 0; i < tt.tt_ntoken; i++) {
266: p->code = i;
267: p->time = -1;
268: p->qback = cc_q0a.qback;
269: p->qforw = &cc_q0a;
270: p->qback->qforw = p;
271: cc_q0a.qback = p;
272: p++;
273: }
274: for (; i < cc_ntoken; i++) {
275: p->code = -1;
276: p->time = -1;
277: p->qback = cc_q1a.qback;
278: p->qforw = &cc_q1a;
279: p->qback->qforw = p;
280: cc_q1a.qback = p;
281: p++;
282: }
283: cc_tt_ob = tt_ob;
284: cc_tt_obe = tt_obe;
1.4 millert 285: if ((cc_buffer = malloc(cc_bufsize)) == 0)
1.1 deraadt 286: goto nomem;
287: return 0;
288: nomem:
289: wwerrno = WWE_NOMEM;
290: return -1;
291: }
292:
293: ccstart()
294: {
295: int ccflush();
296:
297: ttflush();
298: tt_obp = tt_ob = cc_buffer;
299: tt_obe = tt_ob + cc_bufsize;
300: tt.tt_flush = ccflush;
301: if (cc_trace) {
302: cc_trace_fp = fopen("window-trace", "a");
303: (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
304: }
305: ccreset();
306: }
307:
308: ccreset()
309: {
1.5 mpech 310: struct cc *p;
1.1 deraadt 311:
312: bzero((char *) cc_htab, HSIZE * sizeof *cc_htab);
313: for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
314: p->hback = 0;
315: for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
316: p->hback = 0;
317: }
318:
319: ccend()
320: {
321:
322: ttflush();
323: tt_obp = tt_ob = cc_tt_ob;
324: tt_obe = cc_tt_obe;
325: tt.tt_flush = 0;
326: if (cc_trace_fp != NULL) {
327: (void) fclose(cc_trace_fp);
328: cc_trace_fp = NULL;
329: }
330: }
331:
332: ccflush()
333: {
334: int bufsize = tt_obp - tt_ob;
335: int n;
336:
337: if (tt_ob != cc_buffer)
338: abort();
339: if (cc_trace_fp != NULL) {
340: (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
341: (void) putc(-1, cc_trace_fp);
342: }
343: tt.tt_flush = 0;
344: (*tt.tt_compress)(1);
345: if (bufsize < tt.tt_token_min) {
346: ttflush();
347: goto out;
348: }
349: tt_obp = tt_ob = cc_tt_ob;
350: tt_obe = cc_tt_obe;
351: cc_time0 = cc_time;
352: cc_time += bufsize;
353: n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
354: cc_compress_phase(cc_output, bufsize, cc_tokens, n);
355: cc_output_phase(cc_buffer, cc_output, bufsize);
356: ttflush();
357: tt_obp = tt_ob = cc_buffer;
358: tt_obe = cc_buffer + cc_bufsize;
359: out:
360: (*tt.tt_compress)(0);
361: tt.tt_flush = ccflush;
362: }
363:
364: cc_sweep_phase(buffer, bufsize, tokens)
365: char *buffer;
366: struct cc **tokens;
367: {
1.5 mpech 368: struct cc **pp = tokens;
369: int i, n;
1.1 deraadt 370: #ifdef STATS
371: int nn, ii;
372: #endif
373:
374: #ifdef STATS
375: if (verbose >= 0)
376: time_begin();
377: if (verbose > 0)
378: printf("Sweep:");
379: #endif
380: cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
381: #ifdef STATS
382: ntoken_stat = 0;
383: nn = 0;
384: ii = 0;
385: #endif
386: for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
387: #ifdef STATS
388: if (verbose > 0) {
389: if (ii > 7) {
390: printf("\n ");
391: ii = 0;
392: }
393: ii++;
394: printf(" (%d", i);
395: (void) fflush(stdout);
396: }
397: #endif
398: n = cc_sweep(buffer, bufsize, pp, i);
399: pp += n;
400: #ifdef STATS
401: if (verbose > 0) {
402: if (--n > 0) {
403: printf(" %d", n);
404: nn += n;
405: }
406: putchar(')');
407: }
408: #endif
409: }
410: qinsertq(&cc_q1b, &cc_q1a);
411: #ifdef STATS
412: if (verbose > 0)
413: printf("\n %d tokens, %d candidates\n",
414: ntoken_stat, nn);
415: if (verbose >= 0)
416: time_end();
417: #endif
418: return pp - tokens;
419: }
420:
421: cc_sweep0(buffer, n, length)
422: char *buffer;
423: {
1.5 mpech 424: char *p;
425: short *hc;
426: int i;
427: short c;
428: short pc = tt.tt_padc;
1.1 deraadt 429:
430: /* n and length are at least 1 */
431: p = buffer++;
432: hc = cc_hashcodes;
433: i = n;
434: do {
435: if ((*hc++ = *p++) == pc)
436: hc[-1] = -1;
437: } while (--i);
438: while (--length) {
439: p = buffer++;
440: hc = cc_hashcodes;
441: for (i = n--; --i;) {
442: if ((c = *p++) == pc || *hc < 0)
443: c = -1;
444: else
445: c = hash(*hc, c);
446: *hc++ = c;
447: }
448: }
449: }
450:
451: cc_sweep(buffer, bufsize, tokens, length)
452: char *buffer;
453: struct cc **tokens;
1.5 mpech 454: int length;
1.1 deraadt 455: {
1.5 mpech 456: struct cc *p;
457: char *cp;
458: int i;
1.1 deraadt 459: short *hc;
460: short *places = cc_places[length];
461: struct cc **pp = tokens;
462: short threshold = thresh(length);
463: #ifndef cc_weight
464: short wthreshold = wthresh(length);
465: short limit = wlimit(length);
466: #endif
467: int time;
468: short pc = tt.tt_padc;
469:
470: i = length - 1;
471: bufsize -= i;
472: cp = buffer + i;
473: hc = cc_hashcodes;
474: time = cc_time0;
475: for (i = 0; i < bufsize; i++, time++) {
476: struct cc **h;
477:
478: {
1.5 mpech 479: short *hc1 = hc;
480: short c = *cp++;
481: short hh;
1.1 deraadt 482: if ((hh = *hc1) < 0 || c == pc) {
483: *hc1++ = -1;
484: hc = hc1;
485: continue;
486: }
487: h = cc_htab + (*hc1++ = hash(hh, c));
488: hc = hc1;
489: }
490: for (p = *h; p != 0; p = p->hforw)
491: if (p->length == (char) length) {
1.5 mpech 492: char *p1 = p->string;
493: char *p2 = cp - length;
494: int n = length;
1.1 deraadt 495: do
496: if (*p1++ != *p2++)
497: goto fail;
498: while (--n);
499: break;
500: fail:;
501: }
502: if (p == 0) {
503: p = cc_q1a.qback;
504: if (p == &cc_q1a ||
505: p->time >= cc_time0 && p->length == (char) length)
506: continue;
507: if (p->hback != 0)
508: if ((*p->hback = p->hforw) != 0)
509: p->hforw->hback = p->hback;
510: {
1.5 mpech 511: char *p1 = p->string;
512: char *p2 = cp - length;
513: int n = length;
1.1 deraadt 514: do
515: *p1++ = *p2++;
516: while (--n);
517: }
518: p->length = length;
519: #ifndef cc_weight
520: p->weight = cc_weight;
521: #endif
522: p->time = time;
523: p->bcount = 1;
524: p->ccount = 0;
525: p->flag = 0;
526: if ((p->hforw = *h) != 0)
527: p->hforw->hback = &p->hforw;
528: *h = p;
529: p->hback = h;
530: qinsert(p, &cc_q1a);
531: places[i] = -1;
532: p->places = i;
533: #ifdef STATS
534: ntoken_stat++;
535: #endif
536: } else if (p->time < cc_time0) {
537: #ifndef cc_weight
538: if ((p->weight += p->time - time) < 0)
539: p->weight = cc_weight;
540: else if ((p->weight += cc_weight) > limit)
541: p->weight = limit;
542: #endif
543: p->time = time;
544: p->bcount = 1;
545: p->ccount = 0;
546: if (p->code >= 0) {
547: p->flag = 1;
548: *pp++ = p;
549: } else
550: #ifndef cc_weight
551: if (p->weight >= wthreshold) {
552: p->flag = 1;
553: *pp++ = p;
554: qinsert(p, &cc_q1b);
555: } else
556: #endif
557: {
558: p->flag = 0;
559: qinsert(p, &cc_q1a);
560: }
561: places[i] = -1;
562: p->places = i;
563: #ifdef STATS
564: ntoken_stat++;
565: #endif
566: } else if (p->time + length > time) {
567: /*
568: * overlapping token, don't count as two and
569: * don't update time, but do adjust weight to offset
570: * the difference
571: */
572: #ifndef cc_weight
573: if (cc_weight != 0) { /* XXX */
574: p->weight += time - p->time;
575: if (!p->flag && p->weight >= wthreshold) {
576: p->flag = 1;
577: *pp++ = p;
578: qinsert(p, &cc_q1b);
579: }
580: }
581: #endif
582: places[i] = p->places;
583: p->places = i;
584: } else {
585: #ifndef cc_weight
586: if ((p->weight += p->time - time) < 0)
587: p->weight = cc_weight;
588: else if ((p->weight += cc_weight) > limit)
589: p->weight = limit;
590: #endif
591: p->time = time;
592: p->bcount++;
593: if (!p->flag &&
594: /* code must be < 0 if flag false here */
595: (p->bcount >= threshold
596: #ifndef cc_weight
597: || p->weight >= wthreshold
598: #endif
599: )) {
600: p->flag = 1;
601: *pp++ = p;
602: qinsert(p, &cc_q1b);
603: }
604: places[i] = p->places;
605: p->places = i;
606: }
607: }
608: if ((i = pp - tokens) > 0) {
609: *pp = 0;
610: if (cc_reverse)
611: cc_sweep_reverse(tokens, places);
612: if (cc_sort && i > 1) {
613: int cc_token_compare();
614: qsort((char *) tokens, i, sizeof *tokens,
615: cc_token_compare);
616: }
617: if (cc_chop) {
618: if ((i = i * cc_chop / 100) == 0)
619: i = 1;
620: tokens[i] = 0;
621: }
622: i++;
623: }
624: return i;
625: }
626:
627: cc_sweep_reverse(pp, places)
1.5 mpech 628: struct cc **pp;
629: short *places;
1.1 deraadt 630: {
1.5 mpech 631: struct cc *p;
632: short front, back, t;
1.1 deraadt 633:
634: while ((p = *pp++) != 0) {
635: back = -1;
636: t = p->places;
637: /* the list is never empty */
638: do {
639: front = places[t];
640: places[t] = back;
641: back = t;
642: } while ((t = front) >= 0);
643: p->places = back;
644: }
645: }
646:
647: cc_compress_phase(output, bufsize, tokens, ntoken)
648: struct cc **output;
649: struct cc **tokens;
650: {
1.5 mpech 651: int i;
1.1 deraadt 652:
653: bzero((char *) output, bufsize * sizeof *output);
654: for (i = 0; i < cc_npass0; i++)
655: cc_compress_phase1(output, tokens, ntoken, 0);
656: for (i = 0; i < cc_npass1; i++)
657: cc_compress_phase1(output, tokens, ntoken, 1);
658: cc_compress_cleanup(output, bufsize);
659: }
660:
661: cc_compress_phase1(output, tokens, ntoken, flag)
1.5 mpech 662: struct cc **output;
1.1 deraadt 663: struct cc **tokens;
664: {
1.5 mpech 665: struct cc **pp;
1.1 deraadt 666: #ifdef STATS
1.5 mpech 667: int i = 0;
1.1 deraadt 668: int nt = 0, cc = 0, nc = 0;
669: #endif
670:
671: #ifdef STATS
672: if (verbose >= 0)
673: time_begin();
674: if (verbose > 0)
675: printf("Compress:");
676: #endif
677: pp = tokens;
678: while (pp < tokens + ntoken) {
679: #ifdef STATS
680: if (verbose > 0) {
681: ntoken_stat = 0;
682: ccount_stat = 0;
683: ncover_stat = 0;
684: if (i > 2) {
685: printf("\n ");
686: i = 0;
687: }
688: i++;
689: printf(" (%d", (*pp)->length);
690: (void) fflush(stdout);
691: }
692: #endif
693: pp += cc_compress(output, pp, flag);
694: #ifdef STATS
695: if (verbose > 0) {
696: printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
697: ncover_stat);
698: nt += ntoken_stat;
699: cc += ccount_stat;
700: nc += ncover_stat;
701: }
702: #endif
703: }
704: #ifdef STATS
705: if (verbose > 0)
706: printf("\n total: (%dt %du %dc)\n", nt, cc, nc);
707: if (verbose >= 0)
708: time_end();
709: #endif
710: }
711:
712: cc_compress_cleanup(output, bufsize)
1.5 mpech 713: struct cc **output;
1.1 deraadt 714: {
1.5 mpech 715: struct cc **end;
1.1 deraadt 716:
717: /* the previous output phase may have been interrupted */
718: qinsertq(&cc_q0b, &cc_q0a);
719: for (end = output + bufsize; output < end;) {
1.5 mpech 720: struct cc *p;
721: int length;
1.1 deraadt 722: if ((p = *output) == 0) {
723: output++;
724: continue;
725: }
726: length = p->length;
727: if (!p->flag) {
728: } else if (p->code >= 0) {
729: qinsert(p, &cc_q0b);
730: p->flag = 0;
731: } else if (p->ccount == 0) {
732: *output = 0;
733: } else if (p->ccount >= thresh(length)
734: #ifndef cc_weight
735: || wthreshp(p->weight, length)
736: #endif
737: ) {
738: p->flag = 0;
739: } else {
740: p->ccount = 0;
741: *output = 0;
742: }
743: output += length;
744: }
745: }
746:
747: cc_compress(output, tokens, flag)
748: struct cc **output;
749: struct cc **tokens;
750: char flag;
751: {
752: struct cc **pp = tokens;
1.5 mpech 753: struct cc *p = *pp++;
1.1 deraadt 754: int length = p->length;
755: int threshold = thresh(length);
756: #ifndef cc_weight
757: short wthreshold = wthresh(length);
758: #endif
759: short *places = cc_places[length];
760: int *initial_scores = cc_initial_scores[length];
761: int initial_score0 = put_token_score(length);
762:
763: do {
764: int score;
1.5 mpech 765: struct cc_undo *undop;
1.1 deraadt 766: int ccount;
767: #ifdef STATS
768: int ncover;
769: #endif
770: int i;
771:
772: ccount = p->ccount;
773: if ((short) ccount >= p->bcount)
774: continue;
775: if (p->code >= 0 || ccount >= threshold)
776: score = 0;
777: #ifndef cc_weight
778: else if (p->weight >= wthreshold)
779: /* allow one fewer match than normal */
780: /* XXX, should adjust for ccount */
781: score = - tt.tt_set_token_cost;
782: #endif
783: else
784: score = initial_scores[ccount];
785: undop = cc_undo;
786: #ifdef STATS
787: ncover = 0;
788: #endif
789: for (i = p->places; i >= 0; i = places[i]) {
1.5 mpech 790: struct cc **jp;
791: struct cc *x;
792: struct cc **ip = output + i;
793: int score0 = initial_score0;
1.1 deraadt 794: struct cc **iip = ip + length;
795: struct cc_undo *undop1 = undop;
796:
797: if ((x = *(jp = ip)) != 0)
798: goto z;
799: while (--jp >= output)
800: if ((x = *jp) != 0) {
801: if (jp + x->length > ip)
802: goto z;
803: break;
804: }
805: jp = ip + 1;
806: while (jp < iip) {
807: if ((x = *jp) == 0) {
808: jp++;
809: continue;
810: }
811: z:
812: if (x == p)
813: goto undo;
814: #ifdef STATS
815: ncover++;
816: #endif
817: undop->pos = jp;
818: undop->val = x;
819: undop++;
820: *jp = 0;
821: x->ccount--;
822: score_adjust(score0, x);
823: if (score0 < 0 && flag)
824: goto undo;
825: jp += x->length;
826: }
827: undop->pos = ip;
828: undop->val = 0;
829: undop++;
830: *ip = p;
831: ccount++;
832: score += score0;
833: continue;
834: undo:
835: while (--undop >= undop1)
836: if (*undop->pos = x = undop->val)
837: x->ccount++;
838: undop++;
839: }
840: if (score > 0) {
841: #ifdef STATS
842: ccount_stat += ccount - p->ccount;
843: ntoken_stat++;
844: ncover_stat += ncover;
845: #endif
846: p->ccount = ccount;
847: } else {
1.5 mpech 848: struct cc_undo *u = cc_undo;
1.1 deraadt 849: while (--undop >= u) {
1.5 mpech 850: struct cc *x;
1.1 deraadt 851: if (*undop->pos = x = undop->val)
852: x->ccount++;
853: }
854: }
855: } while ((p = *pp++) != 0);
856: return pp - tokens;
857: }
858:
859: cc_output_phase(buffer, output, bufsize)
1.5 mpech 860: char *buffer;
861: struct cc **output;
862: int bufsize;
1.1 deraadt 863: {
1.5 mpech 864: int i;
865: struct cc *p, *p1;
1.1 deraadt 866:
867: for (i = 0; i < bufsize;) {
868: if ((p = output[i]) == 0) {
869: ttputc(buffer[i]);
870: i++;
871: } else if (p->code >= 0) {
872: if (--p->ccount == 0)
873: qinsert(p, &cc_q0a);
874: (*tt.tt_put_token)(p->code, p->string, p->length);
875: wwntokuse++;
876: wwntoksave += put_token_score(p->length);
877: i += p->length;
878: } else if ((p1 = cc_q0a.qback) != &cc_q0a) {
879: p->code = p1->code;
880: p1->code = -1;
881: qinsert(p1, &cc_q1a);
882: if (--p->ccount == 0)
883: qinsert(p, &cc_q0a);
884: else
885: qinsert(p, &cc_q0b);
886: (*tt.tt_set_token)(p->code, p->string, p->length);
887: wwntokdef++;
888: wwntoksave -= tt.tt_set_token_cost;
889: i += p->length;
890: } else {
891: p->ccount--;
892: ttwrite(p->string, p->length);
893: wwntokbad++;
894: i += p->length;
895: }
896: }
897: wwntokc += bufsize;
898: }
899:
900: cc_token_compare(p1, p2)
901: struct cc **p1, **p2;
902: {
903: return (*p2)->bcount - (*p1)->bcount;
904: }