Annotation of src/usr.bin/window/compress.c, Revision 1.6
1.6 ! millert 1: /* $OpenBSD: compress.c,v 1.5 2001/11/19 19:02:18 mpech Exp $ */
1.1 deraadt 2: /* $NetBSD: compress.c,v 1.3 1995/09/28 10:34:13 tls Exp $ */
3:
4: /*
5: * Copyright (c) 1989, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * Edward Wang at The University of California, Berkeley.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
1.6 ! millert 19: * 3. Neither the name of the University nor the names of its contributors
1.1 deraadt 20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: */
35:
36: #ifndef lint
37: #if 0
38: static char sccsid[] = "@(#)compress.c 8.1 (Berkeley) 6/6/93";
39: #else
1.6 ! millert 40: static char rcsid[] = "$OpenBSD: compress.c,v 1.5 2001/11/19 19:02:18 mpech Exp $";
1.1 deraadt 41: #endif
42: #endif /* not lint */
1.3 downsj 43:
44: #include <stdlib.h>
1.1 deraadt 45:
46: #include "ww.h"
47: #include "tt.h"
48:
49: /* special */
50: #include <stdio.h>
51: #include <fcntl.h>
52: int cc_trace = 0;
53: FILE *cc_trace_fp;
54:
55: /* tunable parameters */
56:
57: int cc_reverse = 1;
58: int cc_sort = 0;
59: int cc_chop = 0;
60:
61: int cc_token_max = 8; /* <= TOKEN_MAX */
62: int cc_token_min = 2; /* > tt.tt_put_token_cost */
63: int cc_npass0 = 1;
64: int cc_npass1 = 1;
65:
66: int cc_bufsize = 1024 * 3; /* XXX, or 80 * 24 * 2 */
67:
68: int cc_ntoken = 8192;
69:
70: #define cc_weight XXX
71: #ifndef cc_weight
72: int cc_weight = 0;
73: #endif
74:
75: #define TOKEN_MAX 16
76:
77: struct cc {
78: char string[TOKEN_MAX];
79: char length;
80: char flag;
81: #ifndef cc_weight
82: short weight;
83: #endif
84: long time; /* time last seen */
85: short bcount; /* count in this buffer */
86: short ccount; /* count in compression */
87: short places; /* places in the buffer */
88: short code; /* token code */
89: struct cc *qforw, *qback;
90: struct cc *hforw, **hback;
91: };
92:
93: short cc_thresholds[TOKEN_MAX + 1];
94: #define thresh(length) (cc_thresholds[length])
95: #define threshp(code, count, length) \
96: ((code) >= 0 || (short) (count) >= cc_thresholds[length])
97:
98: #ifndef cc_weight
99: short cc_wthresholds[TOKEN_MAX + 1];
100: #define wthresh(length) (cc_wthresholds[length])
101: #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
102: #else
103: #define wthreshp(weight, length) (0)
104: #endif
105:
106: #ifndef cc_weight
107: short cc_wlimits[TOKEN_MAX + 1];
108: #define wlimit(length) (cc_wlimits[length])
109: #endif
110:
111: #define put_token_score(length) ((length) - tt.tt_put_token_cost)
112:
113: int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
114: #define score_adjust(score, p) \
115: do { \
116: int length = (p)->length; \
117: int ccount = (p)->ccount; \
118: if (threshp((p)->code, ccount, length) || \
119: wthreshp((p)->weight, length)) /* XXX */ \
120: (score) -= length - tt.tt_put_token_cost; \
121: else \
122: (score) += cc_score_adjustments[length][ccount]; \
123: } while (0)
124:
125: int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
126:
127: struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
128:
129: #define qinsert(p1, p2) \
130: do { \
1.5 mpech 131: struct cc *forw = (p1)->qforw; \
132: struct cc *back = (p1)->qback; \
1.1 deraadt 133: back->qforw = forw; \
134: forw->qback = back; \
135: forw = (p2)->qforw; \
136: (p1)->qforw = forw; \
137: forw->qback = (p1); \
138: (p2)->qforw = (p1); \
139: (p1)->qback = (p2); \
140: } while (0)
141:
142: #define qinsertq(q, p) \
143: ((q)->qforw == (q) ? 0 : \
144: ((q)->qback->qforw = (p)->qforw, \
145: (p)->qforw->qback = (q)->qback, \
146: (q)->qforw->qback = (p), \
147: (p)->qforw = (q)->qforw, \
148: (q)->qforw = (q), \
149: (q)->qback = (q)))
150:
151: #define H (14)
152: #define HSIZE (1 << H)
153: #define hash(h, c) ((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1)
154:
155: char *cc_buffer;
156: struct cc **cc_output; /* the output array */
157: short *cc_places[TOKEN_MAX + 1];
158: short *cc_hashcodes; /* for computing hashcodes */
159: struct cc **cc_htab; /* the hash table */
160: struct cc **cc_tokens; /* holds all the active tokens */
161: struct cc_undo {
162: struct cc **pos;
163: struct cc *val;
164: } *cc_undo;
165:
166: long cc_time, cc_time0;
167:
168: char *cc_tt_ob, *cc_tt_obe;
169:
170: ccinit()
171: {
1.5 mpech 172: int i, j;
173: struct cc *p;
1.1 deraadt 174:
175: if (tt.tt_token_max > cc_token_max)
176: tt.tt_token_max = cc_token_max;
177: if (tt.tt_token_min < cc_token_min)
178: tt.tt_token_min = cc_token_min;
179: if (tt.tt_token_min > tt.tt_token_max) {
180: tt.tt_ntoken = 0;
181: return 0;
182: }
183: if (tt.tt_ntoken > cc_ntoken / 2) /* not likely */
184: tt.tt_ntoken = cc_ntoken / 2;
185: #define C(x) (sizeof (x) / sizeof *(x))
186: for (i = 0; i < C(cc_thresholds); i++) {
187: int h = i - tt.tt_put_token_cost;
188: if (h > 0)
189: cc_thresholds[i] =
190: (tt.tt_set_token_cost + 1 + h - 1) / h + 1;
191: else
192: cc_thresholds[i] = 0;
193: }
194: for (i = 0; i < C(cc_score_adjustments); i++) {
195: int t = cc_thresholds[i];
196: for (j = 0; j < C(*cc_score_adjustments); j++) {
197: if (j >= t)
198: cc_score_adjustments[i][j] =
199: - (i - tt.tt_put_token_cost);
200: else if (j < t - 1)
201: cc_score_adjustments[i][j] = 0;
202: else
203: /*
204: * cost now is
205: * length * (ccount + 1) a
206: * cost before was
207: * set-token-cost + length +
208: * ccount * put-token-cost b
209: * the score adjustment is (b - a)
210: */
211: cc_score_adjustments[i][j] =
212: tt.tt_set_token_cost + i +
213: j * tt.tt_put_token_cost -
214: i * (j + 1);
215: if (j >= t)
216: cc_initial_scores[i][j] = 0;
217: else
218: /*
219: * - (set-token-cost +
220: * (length - put-token-cost) -
221: * (length - put-token-cost) * ccount)
222: */
223: cc_initial_scores[i][j] =
224: - (tt.tt_set_token_cost +
225: (i - tt.tt_put_token_cost) -
226: (i - tt.tt_put_token_cost) * j);
227: }
228: }
229: #ifndef cc_weight
230: for (i = 1; i < C(cc_wthresholds); i++) {
231: cc_wthresholds[i] =
232: ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
233: i / 5 + 1) *
234: cc_weight + 1;
235: cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
236: }
237: #endif
238: #undef C
239: if ((cc_output = (struct cc **)
1.4 millert 240: malloc(cc_bufsize * sizeof *cc_output)) == 0)
1.1 deraadt 241: goto nomem;
242: if ((cc_hashcodes = (short *)
1.4 millert 243: malloc(cc_bufsize * sizeof *cc_hashcodes)) == 0)
1.1 deraadt 244: goto nomem;
245: if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
246: goto nomem;
247: if ((cc_tokens = (struct cc **)
1.4 millert 248: malloc((cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
1.1 deraadt 249: sizeof *cc_tokens)) == 0)
250: goto nomem;
251: if ((cc_undo = (struct cc_undo *)
1.4 millert 252: malloc(cc_bufsize * sizeof *cc_undo)) == 0)
1.1 deraadt 253: goto nomem;
254: for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
255: if ((cc_places[i] = (short *)
1.4 millert 256: malloc(cc_bufsize * sizeof **cc_places)) == 0)
1.1 deraadt 257: goto nomem;
258: cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
259: cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
260: cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
261: cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
1.4 millert 262: if ((p = (struct cc *) malloc(cc_ntoken * sizeof *p)) == 0)
1.1 deraadt 263: goto nomem;
264: for (i = 0; i < tt.tt_ntoken; i++) {
265: p->code = i;
266: p->time = -1;
267: p->qback = cc_q0a.qback;
268: p->qforw = &cc_q0a;
269: p->qback->qforw = p;
270: cc_q0a.qback = p;
271: p++;
272: }
273: for (; i < cc_ntoken; i++) {
274: p->code = -1;
275: p->time = -1;
276: p->qback = cc_q1a.qback;
277: p->qforw = &cc_q1a;
278: p->qback->qforw = p;
279: cc_q1a.qback = p;
280: p++;
281: }
282: cc_tt_ob = tt_ob;
283: cc_tt_obe = tt_obe;
1.4 millert 284: if ((cc_buffer = malloc(cc_bufsize)) == 0)
1.1 deraadt 285: goto nomem;
286: return 0;
287: nomem:
288: wwerrno = WWE_NOMEM;
289: return -1;
290: }
291:
292: ccstart()
293: {
294: int ccflush();
295:
296: ttflush();
297: tt_obp = tt_ob = cc_buffer;
298: tt_obe = tt_ob + cc_bufsize;
299: tt.tt_flush = ccflush;
300: if (cc_trace) {
301: cc_trace_fp = fopen("window-trace", "a");
302: (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
303: }
304: ccreset();
305: }
306:
307: ccreset()
308: {
1.5 mpech 309: struct cc *p;
1.1 deraadt 310:
311: bzero((char *) cc_htab, HSIZE * sizeof *cc_htab);
312: for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
313: p->hback = 0;
314: for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
315: p->hback = 0;
316: }
317:
318: ccend()
319: {
320:
321: ttflush();
322: tt_obp = tt_ob = cc_tt_ob;
323: tt_obe = cc_tt_obe;
324: tt.tt_flush = 0;
325: if (cc_trace_fp != NULL) {
326: (void) fclose(cc_trace_fp);
327: cc_trace_fp = NULL;
328: }
329: }
330:
331: ccflush()
332: {
333: int bufsize = tt_obp - tt_ob;
334: int n;
335:
336: if (tt_ob != cc_buffer)
337: abort();
338: if (cc_trace_fp != NULL) {
339: (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
340: (void) putc(-1, cc_trace_fp);
341: }
342: tt.tt_flush = 0;
343: (*tt.tt_compress)(1);
344: if (bufsize < tt.tt_token_min) {
345: ttflush();
346: goto out;
347: }
348: tt_obp = tt_ob = cc_tt_ob;
349: tt_obe = cc_tt_obe;
350: cc_time0 = cc_time;
351: cc_time += bufsize;
352: n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
353: cc_compress_phase(cc_output, bufsize, cc_tokens, n);
354: cc_output_phase(cc_buffer, cc_output, bufsize);
355: ttflush();
356: tt_obp = tt_ob = cc_buffer;
357: tt_obe = cc_buffer + cc_bufsize;
358: out:
359: (*tt.tt_compress)(0);
360: tt.tt_flush = ccflush;
361: }
362:
363: cc_sweep_phase(buffer, bufsize, tokens)
364: char *buffer;
365: struct cc **tokens;
366: {
1.5 mpech 367: struct cc **pp = tokens;
368: int i, n;
1.1 deraadt 369: #ifdef STATS
370: int nn, ii;
371: #endif
372:
373: #ifdef STATS
374: if (verbose >= 0)
375: time_begin();
376: if (verbose > 0)
377: printf("Sweep:");
378: #endif
379: cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
380: #ifdef STATS
381: ntoken_stat = 0;
382: nn = 0;
383: ii = 0;
384: #endif
385: for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
386: #ifdef STATS
387: if (verbose > 0) {
388: if (ii > 7) {
389: printf("\n ");
390: ii = 0;
391: }
392: ii++;
393: printf(" (%d", i);
394: (void) fflush(stdout);
395: }
396: #endif
397: n = cc_sweep(buffer, bufsize, pp, i);
398: pp += n;
399: #ifdef STATS
400: if (verbose > 0) {
401: if (--n > 0) {
402: printf(" %d", n);
403: nn += n;
404: }
405: putchar(')');
406: }
407: #endif
408: }
409: qinsertq(&cc_q1b, &cc_q1a);
410: #ifdef STATS
411: if (verbose > 0)
412: printf("\n %d tokens, %d candidates\n",
413: ntoken_stat, nn);
414: if (verbose >= 0)
415: time_end();
416: #endif
417: return pp - tokens;
418: }
419:
420: cc_sweep0(buffer, n, length)
421: char *buffer;
422: {
1.5 mpech 423: char *p;
424: short *hc;
425: int i;
426: short c;
427: short pc = tt.tt_padc;
1.1 deraadt 428:
429: /* n and length are at least 1 */
430: p = buffer++;
431: hc = cc_hashcodes;
432: i = n;
433: do {
434: if ((*hc++ = *p++) == pc)
435: hc[-1] = -1;
436: } while (--i);
437: while (--length) {
438: p = buffer++;
439: hc = cc_hashcodes;
440: for (i = n--; --i;) {
441: if ((c = *p++) == pc || *hc < 0)
442: c = -1;
443: else
444: c = hash(*hc, c);
445: *hc++ = c;
446: }
447: }
448: }
449:
450: cc_sweep(buffer, bufsize, tokens, length)
451: char *buffer;
452: struct cc **tokens;
1.5 mpech 453: int length;
1.1 deraadt 454: {
1.5 mpech 455: struct cc *p;
456: char *cp;
457: int i;
1.1 deraadt 458: short *hc;
459: short *places = cc_places[length];
460: struct cc **pp = tokens;
461: short threshold = thresh(length);
462: #ifndef cc_weight
463: short wthreshold = wthresh(length);
464: short limit = wlimit(length);
465: #endif
466: int time;
467: short pc = tt.tt_padc;
468:
469: i = length - 1;
470: bufsize -= i;
471: cp = buffer + i;
472: hc = cc_hashcodes;
473: time = cc_time0;
474: for (i = 0; i < bufsize; i++, time++) {
475: struct cc **h;
476:
477: {
1.5 mpech 478: short *hc1 = hc;
479: short c = *cp++;
480: short hh;
1.1 deraadt 481: if ((hh = *hc1) < 0 || c == pc) {
482: *hc1++ = -1;
483: hc = hc1;
484: continue;
485: }
486: h = cc_htab + (*hc1++ = hash(hh, c));
487: hc = hc1;
488: }
489: for (p = *h; p != 0; p = p->hforw)
490: if (p->length == (char) length) {
1.5 mpech 491: char *p1 = p->string;
492: char *p2 = cp - length;
493: int n = length;
1.1 deraadt 494: do
495: if (*p1++ != *p2++)
496: goto fail;
497: while (--n);
498: break;
499: fail:;
500: }
501: if (p == 0) {
502: p = cc_q1a.qback;
503: if (p == &cc_q1a ||
504: p->time >= cc_time0 && p->length == (char) length)
505: continue;
506: if (p->hback != 0)
507: if ((*p->hback = p->hforw) != 0)
508: p->hforw->hback = p->hback;
509: {
1.5 mpech 510: char *p1 = p->string;
511: char *p2 = cp - length;
512: int n = length;
1.1 deraadt 513: do
514: *p1++ = *p2++;
515: while (--n);
516: }
517: p->length = length;
518: #ifndef cc_weight
519: p->weight = cc_weight;
520: #endif
521: p->time = time;
522: p->bcount = 1;
523: p->ccount = 0;
524: p->flag = 0;
525: if ((p->hforw = *h) != 0)
526: p->hforw->hback = &p->hforw;
527: *h = p;
528: p->hback = h;
529: qinsert(p, &cc_q1a);
530: places[i] = -1;
531: p->places = i;
532: #ifdef STATS
533: ntoken_stat++;
534: #endif
535: } else if (p->time < cc_time0) {
536: #ifndef cc_weight
537: if ((p->weight += p->time - time) < 0)
538: p->weight = cc_weight;
539: else if ((p->weight += cc_weight) > limit)
540: p->weight = limit;
541: #endif
542: p->time = time;
543: p->bcount = 1;
544: p->ccount = 0;
545: if (p->code >= 0) {
546: p->flag = 1;
547: *pp++ = p;
548: } else
549: #ifndef cc_weight
550: if (p->weight >= wthreshold) {
551: p->flag = 1;
552: *pp++ = p;
553: qinsert(p, &cc_q1b);
554: } else
555: #endif
556: {
557: p->flag = 0;
558: qinsert(p, &cc_q1a);
559: }
560: places[i] = -1;
561: p->places = i;
562: #ifdef STATS
563: ntoken_stat++;
564: #endif
565: } else if (p->time + length > time) {
566: /*
567: * overlapping token, don't count as two and
568: * don't update time, but do adjust weight to offset
569: * the difference
570: */
571: #ifndef cc_weight
572: if (cc_weight != 0) { /* XXX */
573: p->weight += time - p->time;
574: if (!p->flag && p->weight >= wthreshold) {
575: p->flag = 1;
576: *pp++ = p;
577: qinsert(p, &cc_q1b);
578: }
579: }
580: #endif
581: places[i] = p->places;
582: p->places = i;
583: } else {
584: #ifndef cc_weight
585: if ((p->weight += p->time - time) < 0)
586: p->weight = cc_weight;
587: else if ((p->weight += cc_weight) > limit)
588: p->weight = limit;
589: #endif
590: p->time = time;
591: p->bcount++;
592: if (!p->flag &&
593: /* code must be < 0 if flag false here */
594: (p->bcount >= threshold
595: #ifndef cc_weight
596: || p->weight >= wthreshold
597: #endif
598: )) {
599: p->flag = 1;
600: *pp++ = p;
601: qinsert(p, &cc_q1b);
602: }
603: places[i] = p->places;
604: p->places = i;
605: }
606: }
607: if ((i = pp - tokens) > 0) {
608: *pp = 0;
609: if (cc_reverse)
610: cc_sweep_reverse(tokens, places);
611: if (cc_sort && i > 1) {
612: int cc_token_compare();
613: qsort((char *) tokens, i, sizeof *tokens,
614: cc_token_compare);
615: }
616: if (cc_chop) {
617: if ((i = i * cc_chop / 100) == 0)
618: i = 1;
619: tokens[i] = 0;
620: }
621: i++;
622: }
623: return i;
624: }
625:
626: cc_sweep_reverse(pp, places)
1.5 mpech 627: struct cc **pp;
628: short *places;
1.1 deraadt 629: {
1.5 mpech 630: struct cc *p;
631: short front, back, t;
1.1 deraadt 632:
633: while ((p = *pp++) != 0) {
634: back = -1;
635: t = p->places;
636: /* the list is never empty */
637: do {
638: front = places[t];
639: places[t] = back;
640: back = t;
641: } while ((t = front) >= 0);
642: p->places = back;
643: }
644: }
645:
646: cc_compress_phase(output, bufsize, tokens, ntoken)
647: struct cc **output;
648: struct cc **tokens;
649: {
1.5 mpech 650: int i;
1.1 deraadt 651:
652: bzero((char *) output, bufsize * sizeof *output);
653: for (i = 0; i < cc_npass0; i++)
654: cc_compress_phase1(output, tokens, ntoken, 0);
655: for (i = 0; i < cc_npass1; i++)
656: cc_compress_phase1(output, tokens, ntoken, 1);
657: cc_compress_cleanup(output, bufsize);
658: }
659:
660: cc_compress_phase1(output, tokens, ntoken, flag)
1.5 mpech 661: struct cc **output;
1.1 deraadt 662: struct cc **tokens;
663: {
1.5 mpech 664: struct cc **pp;
1.1 deraadt 665: #ifdef STATS
1.5 mpech 666: int i = 0;
1.1 deraadt 667: int nt = 0, cc = 0, nc = 0;
668: #endif
669:
670: #ifdef STATS
671: if (verbose >= 0)
672: time_begin();
673: if (verbose > 0)
674: printf("Compress:");
675: #endif
676: pp = tokens;
677: while (pp < tokens + ntoken) {
678: #ifdef STATS
679: if (verbose > 0) {
680: ntoken_stat = 0;
681: ccount_stat = 0;
682: ncover_stat = 0;
683: if (i > 2) {
684: printf("\n ");
685: i = 0;
686: }
687: i++;
688: printf(" (%d", (*pp)->length);
689: (void) fflush(stdout);
690: }
691: #endif
692: pp += cc_compress(output, pp, flag);
693: #ifdef STATS
694: if (verbose > 0) {
695: printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
696: ncover_stat);
697: nt += ntoken_stat;
698: cc += ccount_stat;
699: nc += ncover_stat;
700: }
701: #endif
702: }
703: #ifdef STATS
704: if (verbose > 0)
705: printf("\n total: (%dt %du %dc)\n", nt, cc, nc);
706: if (verbose >= 0)
707: time_end();
708: #endif
709: }
710:
711: cc_compress_cleanup(output, bufsize)
1.5 mpech 712: struct cc **output;
1.1 deraadt 713: {
1.5 mpech 714: struct cc **end;
1.1 deraadt 715:
716: /* the previous output phase may have been interrupted */
717: qinsertq(&cc_q0b, &cc_q0a);
718: for (end = output + bufsize; output < end;) {
1.5 mpech 719: struct cc *p;
720: int length;
1.1 deraadt 721: if ((p = *output) == 0) {
722: output++;
723: continue;
724: }
725: length = p->length;
726: if (!p->flag) {
727: } else if (p->code >= 0) {
728: qinsert(p, &cc_q0b);
729: p->flag = 0;
730: } else if (p->ccount == 0) {
731: *output = 0;
732: } else if (p->ccount >= thresh(length)
733: #ifndef cc_weight
734: || wthreshp(p->weight, length)
735: #endif
736: ) {
737: p->flag = 0;
738: } else {
739: p->ccount = 0;
740: *output = 0;
741: }
742: output += length;
743: }
744: }
745:
746: cc_compress(output, tokens, flag)
747: struct cc **output;
748: struct cc **tokens;
749: char flag;
750: {
751: struct cc **pp = tokens;
1.5 mpech 752: struct cc *p = *pp++;
1.1 deraadt 753: int length = p->length;
754: int threshold = thresh(length);
755: #ifndef cc_weight
756: short wthreshold = wthresh(length);
757: #endif
758: short *places = cc_places[length];
759: int *initial_scores = cc_initial_scores[length];
760: int initial_score0 = put_token_score(length);
761:
762: do {
763: int score;
1.5 mpech 764: struct cc_undo *undop;
1.1 deraadt 765: int ccount;
766: #ifdef STATS
767: int ncover;
768: #endif
769: int i;
770:
771: ccount = p->ccount;
772: if ((short) ccount >= p->bcount)
773: continue;
774: if (p->code >= 0 || ccount >= threshold)
775: score = 0;
776: #ifndef cc_weight
777: else if (p->weight >= wthreshold)
778: /* allow one fewer match than normal */
779: /* XXX, should adjust for ccount */
780: score = - tt.tt_set_token_cost;
781: #endif
782: else
783: score = initial_scores[ccount];
784: undop = cc_undo;
785: #ifdef STATS
786: ncover = 0;
787: #endif
788: for (i = p->places; i >= 0; i = places[i]) {
1.5 mpech 789: struct cc **jp;
790: struct cc *x;
791: struct cc **ip = output + i;
792: int score0 = initial_score0;
1.1 deraadt 793: struct cc **iip = ip + length;
794: struct cc_undo *undop1 = undop;
795:
796: if ((x = *(jp = ip)) != 0)
797: goto z;
798: while (--jp >= output)
799: if ((x = *jp) != 0) {
800: if (jp + x->length > ip)
801: goto z;
802: break;
803: }
804: jp = ip + 1;
805: while (jp < iip) {
806: if ((x = *jp) == 0) {
807: jp++;
808: continue;
809: }
810: z:
811: if (x == p)
812: goto undo;
813: #ifdef STATS
814: ncover++;
815: #endif
816: undop->pos = jp;
817: undop->val = x;
818: undop++;
819: *jp = 0;
820: x->ccount--;
821: score_adjust(score0, x);
822: if (score0 < 0 && flag)
823: goto undo;
824: jp += x->length;
825: }
826: undop->pos = ip;
827: undop->val = 0;
828: undop++;
829: *ip = p;
830: ccount++;
831: score += score0;
832: continue;
833: undo:
834: while (--undop >= undop1)
835: if (*undop->pos = x = undop->val)
836: x->ccount++;
837: undop++;
838: }
839: if (score > 0) {
840: #ifdef STATS
841: ccount_stat += ccount - p->ccount;
842: ntoken_stat++;
843: ncover_stat += ncover;
844: #endif
845: p->ccount = ccount;
846: } else {
1.5 mpech 847: struct cc_undo *u = cc_undo;
1.1 deraadt 848: while (--undop >= u) {
1.5 mpech 849: struct cc *x;
1.1 deraadt 850: if (*undop->pos = x = undop->val)
851: x->ccount++;
852: }
853: }
854: } while ((p = *pp++) != 0);
855: return pp - tokens;
856: }
857:
858: cc_output_phase(buffer, output, bufsize)
1.5 mpech 859: char *buffer;
860: struct cc **output;
861: int bufsize;
1.1 deraadt 862: {
1.5 mpech 863: int i;
864: struct cc *p, *p1;
1.1 deraadt 865:
866: for (i = 0; i < bufsize;) {
867: if ((p = output[i]) == 0) {
868: ttputc(buffer[i]);
869: i++;
870: } else if (p->code >= 0) {
871: if (--p->ccount == 0)
872: qinsert(p, &cc_q0a);
873: (*tt.tt_put_token)(p->code, p->string, p->length);
874: wwntokuse++;
875: wwntoksave += put_token_score(p->length);
876: i += p->length;
877: } else if ((p1 = cc_q0a.qback) != &cc_q0a) {
878: p->code = p1->code;
879: p1->code = -1;
880: qinsert(p1, &cc_q1a);
881: if (--p->ccount == 0)
882: qinsert(p, &cc_q0a);
883: else
884: qinsert(p, &cc_q0b);
885: (*tt.tt_set_token)(p->code, p->string, p->length);
886: wwntokdef++;
887: wwntoksave -= tt.tt_set_token_cost;
888: i += p->length;
889: } else {
890: p->ccount--;
891: ttwrite(p->string, p->length);
892: wwntokbad++;
893: i += p->length;
894: }
895: }
896: wwntokc += bufsize;
897: }
898:
899: cc_token_compare(p1, p2)
900: struct cc **p1, **p2;
901: {
902: return (*p2)->bcount - (*p1)->bcount;
903: }