src/usr.bin/window/compress.c - annotate

Return to compress.c CVS log
Up to [local] / src / usr.bin / window
Annotation of src/usr.bin/window/compress.c, Revision 1.5

1.5     ! mpech       1: /*     $OpenBSD: compress.c,v 1.4 1998/04/26 22:49:04 millert Exp $    */
1.1       deraadt     2: /*     $NetBSD: compress.c,v 1.3 1995/09/28 10:34:13 tls Exp $ */
                      3:
                      4: /*
                      5:  * Copyright (c) 1989, 1993
                      6:  *     The Regents of the University of California.  All rights reserved.
                      7:  *
                      8:  * This code is derived from software contributed to Berkeley by
                      9:  * Edward Wang at The University of California, Berkeley.
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. All advertising materials mentioning features or use of this software
                     20:  *    must display the following acknowledgement:
                     21:  *     This product includes software developed by the University of
                     22:  *     California, Berkeley and its contributors.
                     23:  * 4. Neither the name of the University nor the names of its contributors
                     24:  *    may be used to endorse or promote products derived from this software
                     25:  *    without specific prior written permission.
                     26:  *
                     27:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     28:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     29:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     30:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     31:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     32:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     33:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     34:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     35:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     36:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     37:  * SUCH DAMAGE.
                     38:  */
                     39:
                     40: #ifndef lint
                     41: #if 0
                     42: static char sccsid[] = "@(#)compress.c 8.1 (Berkeley) 6/6/93";
                     43: #else
1.5     ! mpech      44: static char rcsid[] = "$OpenBSD: compress.c,v 1.4 1998/04/26 22:49:04 millert Exp $";
1.1       deraadt    45: #endif
                     46: #endif /* not lint */
1.3       downsj     47:
                     48: #include <stdlib.h>
1.1       deraadt    49:
                     50: #include "ww.h"
                     51: #include "tt.h"
                     52:
                     53:        /* special */
                     54: #include <stdio.h>
                     55: #include <fcntl.h>
                     56: int cc_trace = 0;
                     57: FILE *cc_trace_fp;
                     58:
                     59:        /* tunable parameters */
                     60:
                     61: int cc_reverse = 1;
                     62: int cc_sort = 0;
                     63: int cc_chop = 0;
                     64:
                     65: int cc_token_max = 8;          /* <= TOKEN_MAX */
                     66: int cc_token_min = 2;          /* > tt.tt_put_token_cost */
                     67: int cc_npass0 = 1;
                     68: int cc_npass1 = 1;
                     69:
                     70: int cc_bufsize = 1024 * 3;     /* XXX, or 80 * 24 * 2 */
                     71:
                     72: int cc_ntoken = 8192;
                     73:
                     74: #define cc_weight XXX
                     75: #ifndef cc_weight
                     76: int cc_weight = 0;
                     77: #endif
                     78:
                     79: #define TOKEN_MAX 16
                     80:
                     81: struct cc {
                     82:        char string[TOKEN_MAX];
                     83:        char length;
                     84:        char flag;
                     85: #ifndef cc_weight
                     86:        short weight;
                     87: #endif
                     88:        long time;              /* time last seen */
                     89:        short bcount;           /* count in this buffer */
                     90:        short ccount;           /* count in compression */
                     91:        short places;           /* places in the buffer */
                     92:        short code;             /* token code */
                     93:        struct cc *qforw, *qback;
                     94:        struct cc *hforw, **hback;
                     95: };
                     96:
                     97: short cc_thresholds[TOKEN_MAX + 1];
                     98: #define thresh(length) (cc_thresholds[length])
                     99: #define threshp(code, count, length) \
                    100:        ((code) >= 0 || (short) (count) >= cc_thresholds[length])
                    101:
                    102: #ifndef cc_weight
                    103: short cc_wthresholds[TOKEN_MAX + 1];
                    104: #define wthresh(length) (cc_wthresholds[length])
                    105: #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
                    106: #else
                    107: #define wthreshp(weight, length) (0)
                    108: #endif
                    109:
                    110: #ifndef cc_weight
                    111: short cc_wlimits[TOKEN_MAX + 1];
                    112: #define wlimit(length) (cc_wlimits[length])
                    113: #endif
                    114:
                    115: #define put_token_score(length) ((length) - tt.tt_put_token_cost)
                    116:
                    117: int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
                    118: #define score_adjust(score, p) \
                    119:        do { \
                    120:                int length = (p)->length; \
                    121:                int ccount = (p)->ccount; \
                    122:                if (threshp((p)->code, ccount, length) || \
                    123:                    wthreshp((p)->weight, length)) /* XXX */ \
                    124:                        (score) -= length - tt.tt_put_token_cost; \
                    125:                else \
                    126:                        (score) += cc_score_adjustments[length][ccount]; \
                    127:        } while (0)
                    128:
                    129: int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
                    130:
                    131: struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
                    132:
                    133: #define qinsert(p1, p2) \
                    134:        do { \
1.5     ! mpech     135:                struct cc *forw = (p1)->qforw; \
        !           136:                struct cc *back = (p1)->qback; \
1.1       deraadt   137:                back->qforw = forw; \
                    138:                forw->qback = back; \
                    139:                forw = (p2)->qforw; \
                    140:                (p1)->qforw = forw; \
                    141:                forw->qback = (p1); \
                    142:                (p2)->qforw = (p1); \
                    143:                (p1)->qback = (p2); \
                    144:        } while (0)
                    145:
                    146: #define qinsertq(q, p) \
                    147:        ((q)->qforw == (q) ? 0 : \
                    148:         ((q)->qback->qforw = (p)->qforw, \
                    149:          (p)->qforw->qback = (q)->qback, \
                    150:          (q)->qforw->qback = (p), \
                    151:          (p)->qforw = (q)->qforw, \
                    152:          (q)->qforw = (q), \
                    153:          (q)->qback = (q)))
                    154:
                    155: #define H              (14)
                    156: #define HSIZE          (1 << H)
                    157: #define hash(h, c)     ((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1)
                    158:
                    159: char *cc_buffer;
                    160: struct cc **cc_output;                 /* the output array */
                    161: short *cc_places[TOKEN_MAX + 1];
                    162: short *cc_hashcodes;                   /* for computing hashcodes */
                    163: struct cc **cc_htab;                   /* the hash table */
                    164: struct cc **cc_tokens;                 /* holds all the active tokens */
                    165: struct cc_undo {
                    166:        struct cc **pos;
                    167:        struct cc *val;
                    168: } *cc_undo;
                    169:
                    170: long cc_time, cc_time0;
                    171:
                    172: char *cc_tt_ob, *cc_tt_obe;
                    173:
                    174: ccinit()
                    175: {
1.5     ! mpech     176:        int i, j;
        !           177:        struct cc *p;
1.1       deraadt   178:
                    179:        if (tt.tt_token_max > cc_token_max)
                    180:                tt.tt_token_max = cc_token_max;
                    181:        if (tt.tt_token_min < cc_token_min)
                    182:                tt.tt_token_min = cc_token_min;
                    183:        if (tt.tt_token_min > tt.tt_token_max) {
                    184:                tt.tt_ntoken = 0;
                    185:                return 0;
                    186:        }
                    187:        if (tt.tt_ntoken > cc_ntoken / 2)       /* not likely */
                    188:                tt.tt_ntoken = cc_ntoken / 2;
                    189: #define C(x) (sizeof (x) / sizeof *(x))
                    190:        for (i = 0; i < C(cc_thresholds); i++) {
                    191:                int h = i - tt.tt_put_token_cost;
                    192:                if (h > 0)
                    193:                        cc_thresholds[i] =
                    194:                                (tt.tt_set_token_cost + 1 + h - 1) / h + 1;
                    195:                else
                    196:                        cc_thresholds[i] = 0;
                    197:        }
                    198:        for (i = 0; i < C(cc_score_adjustments); i++) {
                    199:                int t = cc_thresholds[i];
                    200:                for (j = 0; j < C(*cc_score_adjustments); j++) {
                    201:                        if (j >= t)
                    202:                                cc_score_adjustments[i][j] =
                    203:                                        - (i - tt.tt_put_token_cost);
                    204:                        else if (j < t - 1)
                    205:                                cc_score_adjustments[i][j] = 0;
                    206:                        else
                    207:                                /*
                    208:                                 * cost now is
                    209:                                 *      length * (ccount + 1)           a
                    210:                                 * cost before was
                    211:                                 *      set-token-cost + length +
                    212:                                 *              ccount * put-token-cost b
                    213:                                 * the score adjustment is (b - a)
                    214:                                 */
                    215:                                cc_score_adjustments[i][j] =
                    216:                                        tt.tt_set_token_cost + i +
                    217:                                                j * tt.tt_put_token_cost -
                    218:                                                        i * (j + 1);
                    219:                        if (j >= t)
                    220:                                cc_initial_scores[i][j] = 0;
                    221:                        else
                    222:                                /*
                    223:                                 * - (set-token-cost +
                    224:                                 *      (length - put-token-cost) -
                    225:                                 *      (length - put-token-cost) * ccount)
                    226:                                 */
                    227:                                cc_initial_scores[i][j] =
                    228:                                        - (tt.tt_set_token_cost +
                    229:                                           (i - tt.tt_put_token_cost) -
                    230:                                           (i - tt.tt_put_token_cost) * j);
                    231:                }
                    232:        }
                    233: #ifndef cc_weight
                    234:        for (i = 1; i < C(cc_wthresholds); i++) {
                    235:                cc_wthresholds[i] =
                    236:                        ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
                    237:                                i / 5 + 1) *
                    238:                                cc_weight + 1;
                    239:                cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
                    240:        }
                    241: #endif
                    242: #undef C
                    243:        if ((cc_output = (struct cc **)
1.4       millert   244:             malloc(cc_bufsize * sizeof *cc_output)) == 0)
1.1       deraadt   245:                goto nomem;
                    246:        if ((cc_hashcodes = (short *)
1.4       millert   247:             malloc(cc_bufsize * sizeof *cc_hashcodes)) == 0)
1.1       deraadt   248:                goto nomem;
                    249:        if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
                    250:                goto nomem;
                    251:        if ((cc_tokens = (struct cc **)
1.4       millert   252:             malloc((cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
1.1       deraadt   253:                    sizeof *cc_tokens)) == 0)
                    254:                goto nomem;
                    255:        if ((cc_undo = (struct cc_undo *)
1.4       millert   256:             malloc(cc_bufsize * sizeof *cc_undo)) == 0)
1.1       deraadt   257:                goto nomem;
                    258:        for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
                    259:                if ((cc_places[i] = (short *)
1.4       millert   260:                     malloc(cc_bufsize * sizeof **cc_places)) == 0)
1.1       deraadt   261:                        goto nomem;
                    262:        cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
                    263:        cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
                    264:        cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
                    265:        cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
1.4       millert   266:        if ((p = (struct cc *) malloc(cc_ntoken * sizeof *p)) == 0)
1.1       deraadt   267:                goto nomem;
                    268:        for (i = 0; i < tt.tt_ntoken; i++) {
                    269:                p->code = i;
                    270:                p->time = -1;
                    271:                p->qback = cc_q0a.qback;
                    272:                p->qforw = &cc_q0a;
                    273:                p->qback->qforw = p;
                    274:                cc_q0a.qback = p;
                    275:                p++;
                    276:        }
                    277:        for (; i < cc_ntoken; i++) {
                    278:                p->code = -1;
                    279:                p->time = -1;
                    280:                p->qback = cc_q1a.qback;
                    281:                p->qforw = &cc_q1a;
                    282:                p->qback->qforw = p;
                    283:                cc_q1a.qback = p;
                    284:                p++;
                    285:        }
                    286:        cc_tt_ob = tt_ob;
                    287:        cc_tt_obe = tt_obe;
1.4       millert   288:        if ((cc_buffer = malloc(cc_bufsize)) == 0)
1.1       deraadt   289:                goto nomem;
                    290:        return 0;
                    291: nomem:
                    292:        wwerrno = WWE_NOMEM;
                    293:        return -1;
                    294: }
                    295:
                    296: ccstart()
                    297: {
                    298:        int ccflush();
                    299:
                    300:        ttflush();
                    301:        tt_obp = tt_ob = cc_buffer;
                    302:        tt_obe = tt_ob + cc_bufsize;
                    303:        tt.tt_flush = ccflush;
                    304:        if (cc_trace) {
                    305:                cc_trace_fp = fopen("window-trace", "a");
                    306:                (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
                    307:        }
                    308:        ccreset();
                    309: }
                    310:
                    311: ccreset()
                    312: {
1.5     ! mpech     313:        struct cc *p;
1.1       deraadt   314:
                    315:        bzero((char *) cc_htab, HSIZE * sizeof *cc_htab);
                    316:        for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
                    317:                p->hback = 0;
                    318:        for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
                    319:                p->hback = 0;
                    320: }
                    321:
                    322: ccend()
                    323: {
                    324:
                    325:        ttflush();
                    326:        tt_obp = tt_ob = cc_tt_ob;
                    327:        tt_obe = cc_tt_obe;
                    328:        tt.tt_flush = 0;
                    329:        if (cc_trace_fp != NULL) {
                    330:                (void) fclose(cc_trace_fp);
                    331:                cc_trace_fp = NULL;
                    332:        }
                    333: }
                    334:
                    335: ccflush()
                    336: {
                    337:        int bufsize = tt_obp - tt_ob;
                    338:        int n;
                    339:
                    340:        if (tt_ob != cc_buffer)
                    341:                abort();
                    342:        if (cc_trace_fp != NULL) {
                    343:                (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
                    344:                (void) putc(-1, cc_trace_fp);
                    345:        }
                    346:        tt.tt_flush = 0;
                    347:        (*tt.tt_compress)(1);
                    348:        if (bufsize < tt.tt_token_min) {
                    349:                ttflush();
                    350:                goto out;
                    351:        }
                    352:        tt_obp = tt_ob = cc_tt_ob;
                    353:        tt_obe = cc_tt_obe;
                    354:        cc_time0 = cc_time;
                    355:        cc_time += bufsize;
                    356:        n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
                    357:        cc_compress_phase(cc_output, bufsize, cc_tokens, n);
                    358:        cc_output_phase(cc_buffer, cc_output, bufsize);
                    359:        ttflush();
                    360:        tt_obp = tt_ob = cc_buffer;
                    361:        tt_obe = cc_buffer + cc_bufsize;
                    362: out:
                    363:        (*tt.tt_compress)(0);
                    364:        tt.tt_flush = ccflush;
                    365: }
                    366:
                    367: cc_sweep_phase(buffer, bufsize, tokens)
                    368:        char *buffer;
                    369:        struct cc **tokens;
                    370: {
1.5     ! mpech     371:        struct cc **pp = tokens;
        !           372:        int i, n;
1.1       deraadt   373: #ifdef STATS
                    374:        int nn, ii;
                    375: #endif
                    376:
                    377: #ifdef STATS
                    378:        if (verbose >= 0)
                    379:                time_begin();
                    380:        if (verbose > 0)
                    381:                printf("Sweep:");
                    382: #endif
                    383:        cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
                    384: #ifdef STATS
                    385:        ntoken_stat = 0;
                    386:        nn = 0;
                    387:        ii = 0;
                    388: #endif
                    389:        for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
                    390: #ifdef STATS
                    391:                if (verbose > 0) {
                    392:                        if (ii > 7) {
                    393:                                printf("\n      ");
                    394:                                ii = 0;
                    395:                        }
                    396:                        ii++;
                    397:                        printf(" (%d", i);
                    398:                        (void) fflush(stdout);
                    399:                }
                    400: #endif
                    401:                n = cc_sweep(buffer, bufsize, pp, i);
                    402:                pp += n;
                    403: #ifdef STATS
                    404:                if (verbose > 0) {
                    405:                        if (--n > 0) {
                    406:                                printf(" %d", n);
                    407:                                nn += n;
                    408:                        }
                    409:                        putchar(')');
                    410:                }
                    411: #endif
                    412:        }
                    413:        qinsertq(&cc_q1b, &cc_q1a);
                    414: #ifdef STATS
                    415:        if (verbose > 0)
                    416:                printf("\n       %d tokens, %d candidates\n",
                    417:                        ntoken_stat, nn);
                    418:        if (verbose >= 0)
                    419:                time_end();
                    420: #endif
                    421:        return pp - tokens;
                    422: }
                    423:
                    424: cc_sweep0(buffer, n, length)
                    425:        char *buffer;
                    426: {
1.5     ! mpech     427:        char *p;
        !           428:        short *hc;
        !           429:        int i;
        !           430:        short c;
        !           431:        short pc = tt.tt_padc;
1.1       deraadt   432:
                    433:        /* n and length are at least 1 */
                    434:        p = buffer++;
                    435:        hc = cc_hashcodes;
                    436:        i = n;
                    437:        do {
                    438:                if ((*hc++ = *p++) == pc)
                    439:                        hc[-1] = -1;
                    440:        } while (--i);
                    441:        while (--length) {
                    442:                p = buffer++;
                    443:                hc = cc_hashcodes;
                    444:                for (i = n--; --i;) {
                    445:                        if ((c = *p++) == pc || *hc < 0)
                    446:                                c = -1;
                    447:                        else
                    448:                                c = hash(*hc, c);
                    449:                        *hc++ = c;
                    450:                }
                    451:        }
                    452: }
                    453:
                    454: cc_sweep(buffer, bufsize, tokens, length)
                    455:        char *buffer;
                    456:        struct cc **tokens;
1.5     ! mpech     457:        int length;
1.1       deraadt   458: {
1.5     ! mpech     459:        struct cc *p;
        !           460:        char *cp;
        !           461:        int i;
1.1       deraadt   462:        short *hc;
                    463:        short *places = cc_places[length];
                    464:        struct cc **pp = tokens;
                    465:        short threshold = thresh(length);
                    466: #ifndef cc_weight
                    467:        short wthreshold = wthresh(length);
                    468:        short limit = wlimit(length);
                    469: #endif
                    470:        int time;
                    471:        short pc = tt.tt_padc;
                    472:
                    473:        i = length - 1;
                    474:        bufsize -= i;
                    475:        cp = buffer + i;
                    476:        hc = cc_hashcodes;
                    477:        time = cc_time0;
                    478:        for (i = 0; i < bufsize; i++, time++) {
                    479:                struct cc **h;
                    480:
                    481:                {
1.5     ! mpech     482:                        short *hc1 = hc;
        !           483:                        short c = *cp++;
        !           484:                        short hh;
1.1       deraadt   485:                        if ((hh = *hc1) < 0 || c == pc) {
                    486:                                *hc1++ = -1;
                    487:                                hc = hc1;
                    488:                                continue;
                    489:                        }
                    490:                        h = cc_htab + (*hc1++ = hash(hh, c));
                    491:                        hc = hc1;
                    492:                }
                    493:                for (p = *h; p != 0; p = p->hforw)
                    494:                        if (p->length == (char) length) {
1.5     ! mpech     495:                                char *p1 = p->string;
        !           496:                                char *p2 = cp - length;
        !           497:                                int n = length;
1.1       deraadt   498:                                do
                    499:                                        if (*p1++ != *p2++)
                    500:                                                goto fail;
                    501:                                while (--n);
                    502:                                break;
                    503:                        fail:;
                    504:                        }
                    505:                if (p == 0) {
                    506:                        p = cc_q1a.qback;
                    507:                        if (p == &cc_q1a ||
                    508:                            p->time >= cc_time0 && p->length == (char) length)
                    509:                                continue;
                    510:                        if (p->hback != 0)
                    511:                                if ((*p->hback = p->hforw) != 0)
                    512:                                        p->hforw->hback = p->hback;
                    513:                        {
1.5     ! mpech     514:                                char *p1 = p->string;
        !           515:                                char *p2 = cp - length;
        !           516:                                int n = length;
1.1       deraadt   517:                                do
                    518:                                        *p1++ = *p2++;
                    519:                                while (--n);
                    520:                        }
                    521:                        p->length = length;
                    522: #ifndef cc_weight
                    523:                        p->weight = cc_weight;
                    524: #endif
                    525:                        p->time = time;
                    526:                        p->bcount = 1;
                    527:                        p->ccount = 0;
                    528:                        p->flag = 0;
                    529:                        if ((p->hforw = *h) != 0)
                    530:                                p->hforw->hback = &p->hforw;
                    531:                        *h = p;
                    532:                        p->hback = h;
                    533:                        qinsert(p, &cc_q1a);
                    534:                        places[i] = -1;
                    535:                        p->places = i;
                    536: #ifdef STATS
                    537:                        ntoken_stat++;
                    538: #endif
                    539:                } else if (p->time < cc_time0) {
                    540: #ifndef cc_weight
                    541:                        if ((p->weight += p->time - time) < 0)
                    542:                                p->weight = cc_weight;
                    543:                        else if ((p->weight += cc_weight) > limit)
                    544:                                p->weight = limit;
                    545: #endif
                    546:                        p->time = time;
                    547:                        p->bcount = 1;
                    548:                        p->ccount = 0;
                    549:                        if (p->code >= 0) {
                    550:                                p->flag = 1;
                    551:                                *pp++ = p;
                    552:                        } else
                    553: #ifndef cc_weight
                    554:                        if (p->weight >= wthreshold) {
                    555:                                p->flag = 1;
                    556:                                *pp++ = p;
                    557:                                qinsert(p, &cc_q1b);
                    558:                        } else
                    559: #endif
                    560:                        {
                    561:                                p->flag = 0;
                    562:                                qinsert(p, &cc_q1a);
                    563:                        }
                    564:                        places[i] = -1;
                    565:                        p->places = i;
                    566: #ifdef STATS
                    567:                        ntoken_stat++;
                    568: #endif
                    569:                } else if (p->time + length > time) {
                    570:                        /*
                    571:                         * overlapping token, don't count as two and
                    572:                         * don't update time, but do adjust weight to offset
                    573:                         * the difference
                    574:                         */
                    575: #ifndef cc_weight
                    576:                        if (cc_weight != 0) {   /* XXX */
                    577:                                p->weight += time - p->time;
                    578:                                if (!p->flag && p->weight >= wthreshold) {
                    579:                                        p->flag = 1;
                    580:                                        *pp++ = p;
                    581:                                        qinsert(p, &cc_q1b);
                    582:                                }
                    583:                        }
                    584: #endif
                    585:                        places[i] = p->places;
                    586:                        p->places = i;
                    587:                } else {
                    588: #ifndef cc_weight
                    589:                        if ((p->weight += p->time - time) < 0)
                    590:                                p->weight = cc_weight;
                    591:                        else if ((p->weight += cc_weight) > limit)
                    592:                                p->weight = limit;
                    593: #endif
                    594:                        p->time = time;
                    595:                        p->bcount++;
                    596:                        if (!p->flag &&
                    597:                            /* code must be < 0 if flag false here */
                    598:                            (p->bcount >= threshold
                    599: #ifndef cc_weight
                    600:                             || p->weight >= wthreshold
                    601: #endif
                    602:                             )) {
                    603:                                p->flag = 1;
                    604:                                *pp++ = p;
                    605:                                qinsert(p, &cc_q1b);
                    606:                        }
                    607:                        places[i] = p->places;
                    608:                        p->places = i;
                    609:                }
                    610:        }
                    611:        if ((i = pp - tokens) > 0) {
                    612:                *pp = 0;
                    613:                if (cc_reverse)
                    614:                        cc_sweep_reverse(tokens, places);
                    615:                if (cc_sort && i > 1) {
                    616:                        int cc_token_compare();
                    617:                        qsort((char *) tokens, i, sizeof *tokens,
                    618:                              cc_token_compare);
                    619:                }
                    620:                if (cc_chop) {
                    621:                        if ((i = i * cc_chop / 100) == 0)
                    622:                                i = 1;
                    623:                        tokens[i] = 0;
                    624:                }
                    625:                i++;
                    626:        }
                    627:        return i;
                    628: }
                    629:
                    630: cc_sweep_reverse(pp, places)
1.5     ! mpech     631:        struct cc **pp;
        !           632:        short *places;
1.1       deraadt   633: {
1.5     ! mpech     634:        struct cc *p;
        !           635:        short front, back, t;
1.1       deraadt   636:
                    637:        while ((p = *pp++) != 0) {
                    638:                back = -1;
                    639:                t = p->places;
                    640:                /* the list is never empty */
                    641:                do {
                    642:                        front = places[t];
                    643:                        places[t] = back;
                    644:                        back = t;
                    645:                } while ((t = front) >= 0);
                    646:                p->places = back;
                    647:        }
                    648: }
                    649:
                    650: cc_compress_phase(output, bufsize, tokens, ntoken)
                    651:        struct cc **output;
                    652:        struct cc **tokens;
                    653: {
1.5     ! mpech     654:        int i;
1.1       deraadt   655:
                    656:        bzero((char *) output, bufsize * sizeof *output);
                    657:        for (i = 0; i < cc_npass0; i++)
                    658:                cc_compress_phase1(output, tokens, ntoken, 0);
                    659:        for (i = 0; i < cc_npass1; i++)
                    660:                cc_compress_phase1(output, tokens, ntoken, 1);
                    661:        cc_compress_cleanup(output, bufsize);
                    662: }
                    663:
                    664: cc_compress_phase1(output, tokens, ntoken, flag)
1.5     ! mpech     665:        struct cc **output;
1.1       deraadt   666:        struct cc **tokens;
                    667: {
1.5     ! mpech     668:        struct cc **pp;
1.1       deraadt   669: #ifdef STATS
1.5     ! mpech     670:        int i = 0;
1.1       deraadt   671:        int nt = 0, cc = 0, nc = 0;
                    672: #endif
                    673:
                    674: #ifdef STATS
                    675:        if (verbose >= 0)
                    676:                time_begin();
                    677:        if (verbose > 0)
                    678:                printf("Compress:");
                    679: #endif
                    680:        pp = tokens;
                    681:        while (pp < tokens + ntoken) {
                    682: #ifdef STATS
                    683:                if (verbose > 0) {
                    684:                        ntoken_stat = 0;
                    685:                        ccount_stat = 0;
                    686:                        ncover_stat = 0;
                    687:                        if (i > 2) {
                    688:                                printf("\n         ");
                    689:                                i = 0;
                    690:                        }
                    691:                        i++;
                    692:                        printf(" (%d", (*pp)->length);
                    693:                        (void) fflush(stdout);
                    694:                }
                    695: #endif
                    696:                pp += cc_compress(output, pp, flag);
                    697: #ifdef STATS
                    698:                if (verbose > 0) {
                    699:                        printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
                    700:                               ncover_stat);
                    701:                        nt += ntoken_stat;
                    702:                        cc += ccount_stat;
                    703:                        nc += ncover_stat;
                    704:                }
                    705: #endif
                    706:        }
                    707: #ifdef STATS
                    708:        if (verbose > 0)
                    709:                printf("\n   total: (%dt %du %dc)\n", nt, cc, nc);
                    710:        if (verbose >= 0)
                    711:                time_end();
                    712: #endif
                    713: }
                    714:
                    715: cc_compress_cleanup(output, bufsize)
1.5     ! mpech     716:        struct cc **output;
1.1       deraadt   717: {
1.5     ! mpech     718:        struct cc **end;
1.1       deraadt   719:
                    720:        /* the previous output phase may have been interrupted */
                    721:        qinsertq(&cc_q0b, &cc_q0a);
                    722:        for (end = output + bufsize; output < end;) {
1.5     ! mpech     723:                struct cc *p;
        !           724:                int length;
1.1       deraadt   725:                if ((p = *output) == 0) {
                    726:                        output++;
                    727:                        continue;
                    728:                }
                    729:                length = p->length;
                    730:                if (!p->flag) {
                    731:                } else if (p->code >= 0) {
                    732:                        qinsert(p, &cc_q0b);
                    733:                        p->flag = 0;
                    734:                } else if (p->ccount == 0) {
                    735:                        *output = 0;
                    736:                } else if (p->ccount >= thresh(length)
                    737: #ifndef cc_weight
                    738:                           || wthreshp(p->weight, length)
                    739: #endif
                    740:                           ) {
                    741:                        p->flag = 0;
                    742:                } else {
                    743:                        p->ccount = 0;
                    744:                        *output = 0;
                    745:                }
                    746:                output += length;
                    747:        }
                    748: }
                    749:
                    750: cc_compress(output, tokens, flag)
                    751:        struct cc **output;
                    752:        struct cc **tokens;
                    753:        char flag;
                    754: {
                    755:        struct cc **pp = tokens;
1.5     ! mpech     756:        struct cc *p = *pp++;
1.1       deraadt   757:        int length = p->length;
                    758:        int threshold = thresh(length);
                    759: #ifndef cc_weight
                    760:        short wthreshold = wthresh(length);
                    761: #endif
                    762:        short *places = cc_places[length];
                    763:        int *initial_scores = cc_initial_scores[length];
                    764:        int initial_score0 = put_token_score(length);
                    765:
                    766:        do {
                    767:                int score;
1.5     ! mpech     768:                struct cc_undo *undop;
1.1       deraadt   769:                int ccount;
                    770: #ifdef STATS
                    771:                int ncover;
                    772: #endif
                    773:                int i;
                    774:
                    775:                ccount = p->ccount;
                    776:                if ((short) ccount >= p->bcount)
                    777:                        continue;
                    778:                if (p->code >= 0 || ccount >= threshold)
                    779:                        score = 0;
                    780: #ifndef cc_weight
                    781:                else if (p->weight >= wthreshold)
                    782:                        /* allow one fewer match than normal */
                    783:                        /* XXX, should adjust for ccount */
                    784:                        score = - tt.tt_set_token_cost;
                    785: #endif
                    786:                else
                    787:                        score = initial_scores[ccount];
                    788:                undop = cc_undo;
                    789: #ifdef STATS
                    790:                ncover = 0;
                    791: #endif
                    792:                for (i = p->places; i >= 0; i = places[i]) {
1.5     ! mpech     793:                        struct cc **jp;
        !           794:                        struct cc *x;
        !           795:                        struct cc **ip = output + i;
        !           796:                        int score0 = initial_score0;
1.1       deraadt   797:                        struct cc **iip = ip + length;
                    798:                        struct cc_undo *undop1 = undop;
                    799:
                    800:                        if ((x = *(jp = ip)) != 0)
                    801:                                goto z;
                    802:                        while (--jp >= output)
                    803:                                if ((x = *jp) != 0) {
                    804:                                        if (jp + x->length > ip)
                    805:                                                goto z;
                    806:                                        break;
                    807:                                }
                    808:                        jp = ip + 1;
                    809:                        while (jp < iip) {
                    810:                                if ((x = *jp) == 0) {
                    811:                                        jp++;
                    812:                                        continue;
                    813:                                }
                    814:                        z:
                    815:                                if (x == p)
                    816:                                        goto undo;
                    817: #ifdef STATS
                    818:                                ncover++;
                    819: #endif
                    820:                                undop->pos = jp;
                    821:                                undop->val = x;
                    822:                                undop++;
                    823:                                *jp = 0;
                    824:                                x->ccount--;
                    825:                                score_adjust(score0, x);
                    826:                                if (score0 < 0 && flag)
                    827:                                        goto undo;
                    828:                                jp += x->length;
                    829:                        }
                    830:                        undop->pos = ip;
                    831:                        undop->val = 0;
                    832:                        undop++;
                    833:                        *ip = p;
                    834:                        ccount++;
                    835:                        score += score0;
                    836:                        continue;
                    837:                undo:
                    838:                        while (--undop >= undop1)
                    839:                                if (*undop->pos = x = undop->val)
                    840:                                        x->ccount++;
                    841:                        undop++;
                    842:                }
                    843:                if (score > 0) {
                    844: #ifdef STATS
                    845:                        ccount_stat += ccount - p->ccount;
                    846:                        ntoken_stat++;
                    847:                        ncover_stat += ncover;
                    848: #endif
                    849:                        p->ccount = ccount;
                    850:                } else {
1.5     ! mpech     851:                        struct cc_undo *u = cc_undo;
1.1       deraadt   852:                        while (--undop >= u) {
1.5     ! mpech     853:                                struct cc *x;
1.1       deraadt   854:                                if (*undop->pos = x = undop->val)
                    855:                                        x->ccount++;
                    856:                        }
                    857:                }
                    858:        } while ((p = *pp++) != 0);
                    859:        return pp - tokens;
                    860: }
                    861:
                    862: cc_output_phase(buffer, output, bufsize)
1.5     ! mpech     863:        char *buffer;
        !           864:        struct cc **output;
        !           865:        int bufsize;
1.1       deraadt   866: {
1.5     ! mpech     867:        int i;
        !           868:        struct cc *p, *p1;
1.1       deraadt   869:
                    870:        for (i = 0; i < bufsize;) {
                    871:                if ((p = output[i]) == 0) {
                    872:                        ttputc(buffer[i]);
                    873:                        i++;
                    874:                } else if (p->code >= 0) {
                    875:                        if (--p->ccount == 0)
                    876:                                qinsert(p, &cc_q0a);
                    877:                        (*tt.tt_put_token)(p->code, p->string, p->length);
                    878:                        wwntokuse++;
                    879:                        wwntoksave += put_token_score(p->length);
                    880:                        i += p->length;
                    881:                } else if ((p1 = cc_q0a.qback) != &cc_q0a) {
                    882:                        p->code = p1->code;
                    883:                        p1->code = -1;
                    884:                        qinsert(p1, &cc_q1a);
                    885:                        if (--p->ccount == 0)
                    886:                                qinsert(p, &cc_q0a);
                    887:                        else
                    888:                                qinsert(p, &cc_q0b);
                    889:                        (*tt.tt_set_token)(p->code, p->string, p->length);
                    890:                        wwntokdef++;
                    891:                        wwntoksave -= tt.tt_set_token_cost;
                    892:                        i += p->length;
                    893:                } else {
                    894:                        p->ccount--;
                    895:                        ttwrite(p->string, p->length);
                    896:                        wwntokbad++;
                    897:                        i += p->length;
                    898:                }
                    899:        }
                    900:        wwntokc += bufsize;
                    901: }
                    902:
                    903: cc_token_compare(p1, p2)
                    904:        struct cc **p1, **p2;
                    905: {
                    906:        return (*p2)->bcount - (*p1)->bcount;
                    907: }