src/usr.bin/diff/diffreg.c - annotate

Return to diffreg.c CVS log
Up to [local] / src / usr.bin / diff
Annotation of src/usr.bin/diff/diffreg.c, Revision 1.2

1.2     ! deraadt     1: /*     $OpenBSD$       */
        !             2:
        !             3: /*
        !             4:  * Copyright (C) Caldera International Inc.  2001-2002.
        !             5:  * All rights reserved.
        !             6:  *
        !             7:  * Redistribution and use in source and binary forms, with or without
        !             8:  * modification, are permitted provided that the following conditions
        !             9:  * are met:
        !            10:  * 1. Redistributions of source code and documentation must retain the above
        !            11:  *    copyright notice, this list of conditions and the following disclaimer.
        !            12:  * 2. Redistributions in binary form must reproduce the above copyright
        !            13:  *    notice, this list of conditions and the following disclaimer in the
        !            14:  *    documentation and/or other materials provided with the distribution.
        !            15:  * 3. All advertising materials mentioning features or use of this software
        !            16:  *    must display the following acknowledgement:
        !            17:  *     This product includes software developed or owned by Caldera
        !            18:  *     International, Inc.
        !            19:  * 4. Neither the name of Caldera International, Inc. nor the names of other
        !            20:  *    contributors may be used to endorse or promote products derived from
        !            21:  *    this software without specific prior written permission.
        !            22:  *
        !            23:  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
        !            24:  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
        !            25:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
        !            26:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
        !            27:  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
        !            28:  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
        !            29:  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
        !            30:  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            31:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
        !            32:  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
        !            33:  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
        !            34:  * POSSIBILITY OF SUCH DAMAGE.
        !            35:  */
        !            36:
1.1       deraadt    37: static char sccsid[] = "@(#)diffreg.c 4.21 4/6/90";
                     38:
                     39: #include "diff.h"
                     40: #include "pathnames.h"
                     41: /*
                     42:  * diff - compare two files.
                     43:  */
                     44:
                     45: /*
                     46:  *     Uses an algorithm due to Harold Stone, which finds
                     47:  *     a pair of longest identical subsequences in the two
                     48:  *     files.
                     49:  *
                     50:  *     The major goal is to generate the match vector J.
                     51:  *     J[i] is the index of the line in file1 corresponding
                     52:  *     to line i file0. J[i] = 0 if there is no
                     53:  *     such line in file1.
                     54:  *
                     55:  *     Lines are hashed so as to work in core. All potential
                     56:  *     matches are located by sorting the lines of each file
                     57:  *     on the hash (called ``value''). In particular, this
                     58:  *     collects the equivalence classes in file1 together.
                     59:  *     Subroutine equiv replaces the value of each line in
                     60:  *     file0 by the index of the first element of its
                     61:  *     matching equivalence in (the reordered) file1.
                     62:  *     To save space equiv squeezes file1 into a single
                     63:  *     array member in which the equivalence classes
                     64:  *     are simply concatenated, except that their first
                     65:  *     members are flagged by changing sign.
                     66:  *
                     67:  *     Next the indices that point into member are unsorted into
                     68:  *     array class according to the original order of file0.
                     69:  *
                     70:  *     The cleverness lies in routine stone. This marches
                     71:  *     through the lines of file0, developing a vector klist
                     72:  *     of "k-candidates". At step i a k-candidate is a matched
                     73:  *     pair of lines x,y (x in file0 y in file1) such that
                     74:  *     there is a common subsequence of length k
                     75:  *     between the first i lines of file0 and the first y
                     76:  *     lines of file1, but there is no such subsequence for
                     77:  *     any smaller y. x is the earliest possible mate to y
                     78:  *     that occurs in such a subsequence.
                     79:  *
                     80:  *     Whenever any of the members of the equivalence class of
                     81:  *     lines in file1 matable to a line in file0 has serial number
                     82:  *     less than the y of some k-candidate, that k-candidate
                     83:  *     with the smallest such y is replaced. The new
                     84:  *     k-candidate is chained (via pred) to the current
                     85:  *     k-1 candidate so that the actual subsequence can
                     86:  *     be recovered. When a member has serial number greater
                     87:  *     that the y of all k-candidates, the klist is extended.
                     88:  *     At the end, the longest subsequence is pulled out
                     89:  *     and placed in the array J by unravel
                     90:  *
                     91:  *     With J in hand, the matches there recorded are
                     92:  *     check'ed against reality to assure that no spurious
                     93:  *     matches have crept in due to hashing. If they have,
                     94:  *     they are broken, and "jackpot" is recorded--a harmless
                     95:  *     matter except that a true match for a spuriously
                     96:  *     mated line may now be unnecessarily reported as a change.
                     97:  *
                     98:  *     Much of the complexity of the program comes simply
                     99:  *     from trying to minimize core utilization and
                    100:  *     maximize the range of doable problems by dynamically
                    101:  *     allocating what is needed and reusing what is not.
                    102:  *     The core requirements for problems larger than somewhat
                    103:  *     are (in words) 2*length(file0) + length(file1) +
                    104:  *     3*(number of k-candidates installed),  typically about
                    105:  *     6n words for files of length n.
                    106:  */
                    107:
                    108: #define        prints(s)       fputs(s,stdout)
                    109:
                    110: FILE   *input[2];
                    111: FILE   *fopen();
                    112:
                    113: struct cand {
                    114:        int     x;
                    115:        int     y;
                    116:        int     pred;
                    117: } cand;
                    118: struct line {
                    119:        int     serial;
                    120:        int     value;
                    121: } *file[2], line;
                    122: int    len[2];
                    123: struct line *sfile[2]; /* shortened by pruning common prefix and suffix */
                    124: int    slen[2];
                    125: int    pref, suff;     /* length of prefix and suffix */
                    126: int    *class;         /* will be overlaid on file[0] */
                    127: int    *member;        /* will be overlaid on file[1] */
                    128: int    *klist;         /* will be overlaid on file[0] after class */
                    129: struct cand *clist;    /* merely a free storage pot for candidates */
                    130: int    clen = 0;
                    131: int    *J;             /* will be overlaid on class */
                    132: long   *ixold;         /* will be overlaid on klist */
                    133: long   *ixnew;         /* will be overlaid on file[1] */
                    134: char   *chrtran;       /* translation table for case-folding */
                    135:
                    136: /* chrtran points to one of 2 translation tables:
                    137:  *     cup2low if folding upper to lower case
                    138:  *     clow2low if not folding case
                    139:  */
                    140: char   clow2low[256] = {
                    141: 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
                    142: 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
                    143: 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
                    144: 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
                    145: 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
                    146: 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
                    147: 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
                    148: 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
                    149: 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
                    150: 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
                    151: 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
                    152: 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
                    153: 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
                    154: 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
                    155: 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
                    156: 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
                    157: };
                    158:
                    159: char   cup2low[256] = {
                    160: 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
                    161: 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
                    162: 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
                    163: 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
                    164: 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
                    165: 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
                    166: 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
                    167: 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
                    168: 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
                    169: 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
                    170: 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
                    171: 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
                    172: 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
                    173: 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
                    174: 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
                    175: 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
                    176: };
                    177:
                    178: diffreg()
                    179: {
                    180:        register int i, j;
                    181:        FILE *f1, *f2;
                    182:        char buf1[BUFSIZ], buf2[BUFSIZ];
                    183:
                    184:        if (hflag) {
                    185:                diffargv[0] = "diffh";
                    186:                execv(diffh, diffargv);
                    187:                fprintf(stderr, "diff: ");
                    188:                perror(diffh);
                    189:                done();
                    190:        }
                    191:        chrtran = (iflag? cup2low : clow2low);
                    192:        if ((stb1.st_mode & S_IFMT) == S_IFDIR) {
                    193:                file1 = splice(file1, file2);
                    194:                if (stat(file1, &stb1) < 0) {
                    195:                        fprintf(stderr, "diff: ");
                    196:                        perror(file1);
                    197:                        done();
                    198:                }
                    199:        } else if ((stb2.st_mode & S_IFMT) == S_IFDIR) {
                    200:                file2 = splice(file2, file1);
                    201:                if (stat(file2, &stb2) < 0) {
                    202:                        fprintf(stderr, "diff: ");
                    203:                        perror(file2);
                    204:                        done();
                    205:                }
                    206:        } else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) {
                    207:                if (!strcmp(file2, "-")) {
                    208:                        fprintf(stderr, "diff: can't specify - -\n");
                    209:                        done();
                    210:                }
                    211:                file1 = copytemp();
                    212:                if (stat(file1, &stb1) < 0) {
                    213:                        fprintf(stderr, "diff: ");
                    214:                        perror(file1);
                    215:                        done();
                    216:                }
                    217:        } else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) {
                    218:                file2 = copytemp();
                    219:                if (stat(file2, &stb2) < 0) {
                    220:                        fprintf(stderr, "diff: ");
                    221:                        perror(file2);
                    222:                        done();
                    223:                }
                    224:        }
                    225:        if ((f1 = fopen(file1, "r")) == NULL) {
                    226:                fprintf(stderr, "diff: ");
                    227:                perror(file1);
                    228:                done();
                    229:        }
                    230:        if ((f2 = fopen(file2, "r")) == NULL) {
                    231:                fprintf(stderr, "diff: ");
                    232:                perror(file2);
                    233:                fclose(f1);
                    234:                done();
                    235:        }
                    236:        if (stb1.st_size != stb2.st_size)
                    237:                goto notsame;
                    238:        for (;;) {
                    239:                i = fread(buf1, 1, BUFSIZ, f1);
                    240:                j = fread(buf2, 1, BUFSIZ, f2);
                    241:                if (i < 0 || j < 0 || i != j)
                    242:                        goto notsame;
                    243:                if (i == 0 && j == 0) {
                    244:                        fclose(f1);
                    245:                        fclose(f2);
                    246:                        status = 0;             /* files don't differ */
                    247:                        goto same;
                    248:                }
                    249:                for (j = 0; j < i; j++)
                    250:                        if (buf1[j] != buf2[j])
                    251:                                goto notsame;
                    252:        }
                    253: notsame:
                    254:        /*
                    255:         *      Files certainly differ at this point; set status accordingly
                    256:         */
                    257:        status = 1;
                    258:        if (!asciifile(f1) || !asciifile(f2)) {
                    259:                printf("Binary files %s and %s differ\n", file1, file2);
                    260:                fclose(f1);
                    261:                fclose(f2);
                    262:                done();
                    263:        }
                    264:        prepare(0, f1);
                    265:        prepare(1, f2);
                    266:        fclose(f1);
                    267:        fclose(f2);
                    268:        prune();
                    269:        sort(sfile[0],slen[0]);
                    270:        sort(sfile[1],slen[1]);
                    271:
                    272:        member = (int *)file[1];
                    273:        equiv(sfile[0], slen[0], sfile[1], slen[1], member);
                    274:        member = (int *)ralloc((char *)member,(slen[1]+2)*sizeof(int));
                    275:
                    276:        class = (int *)file[0];
                    277:        unsort(sfile[0], slen[0], class);
                    278:        class = (int *)ralloc((char *)class,(slen[0]+2)*sizeof(int));
                    279:
                    280:        klist = (int *)talloc((slen[0]+2)*sizeof(int));
                    281:        clist = (struct cand *)talloc(sizeof(cand));
                    282:        i = stone(class, slen[0], member, klist);
                    283:        free((char *)member);
                    284:        free((char *)class);
                    285:
                    286:        J = (int *)talloc((len[0]+2)*sizeof(int));
                    287:        unravel(klist[i]);
                    288:        free((char *)clist);
                    289:        free((char *)klist);
                    290:
                    291:        ixold = (long *)talloc((len[0]+2)*sizeof(long));
                    292:        ixnew = (long *)talloc((len[1]+2)*sizeof(long));
                    293:        check();
                    294:        output();
                    295:        status = anychange;
                    296: same:
                    297:        if (opt == D_CONTEXT && anychange == 0)
                    298:                printf("No differences encountered\n");
                    299:        done();
                    300: }
                    301:
                    302: char *tempfile = _PATH_TMP;
                    303:
                    304: char *
                    305: copytemp()
                    306: {
                    307:        char buf[BUFSIZ];
                    308:        register int i, f;
                    309:
                    310:        signal(SIGHUP,done);
                    311:        signal(SIGINT,done);
                    312:        signal(SIGPIPE,done);
                    313:        signal(SIGTERM,done);
                    314:        mktemp(tempfile);
                    315:        f = creat(tempfile,0600);
                    316:        if (f < 0) {
                    317:                fprintf(stderr, "diff: ");
                    318:                perror(tempfile);
                    319:                done();
                    320:        }
                    321:        while ((i = read(0,buf,BUFSIZ)) > 0)
                    322:                if (write(f,buf,i) != i) {
                    323:                        fprintf(stderr, "diff: ");
                    324:                        perror(tempfile);
                    325:                        done();
                    326:                }
                    327:        close(f);
                    328:        return (tempfile);
                    329: }
                    330:
                    331: char *
                    332: splice(dir, file)
                    333:        char *dir, *file;
                    334: {
                    335:        char *tail;
                    336:        char buf[BUFSIZ];
                    337:
                    338:        if (!strcmp(file, "-")) {
                    339:                fprintf(stderr, "diff: can't specify - with other arg directory\n");
                    340:                done();
                    341:        }
                    342:        tail = rindex(file, '/');
                    343:        if (tail == 0)
                    344:                tail = file;
                    345:        else
                    346:                tail++;
                    347:        (void)sprintf(buf, "%s/%s", dir, tail);
                    348:        return (savestr(buf));
                    349: }
                    350:
                    351: prepare(i, fd)
                    352:        int i;
                    353:        FILE *fd;
                    354: {
                    355:        register struct line *p;
                    356:        register j,h;
                    357:
                    358:        fseek(fd, (long)0, 0);
                    359:        p = (struct line *)talloc(3*sizeof(line));
                    360:        for(j=0; h=readhash(fd);) {
                    361:                p = (struct line *)ralloc((char *)p,(++j+3)*sizeof(line));
                    362:                p[j].value = h;
                    363:        }
                    364:        len[i] = j;
                    365:        file[i] = p;
                    366: }
                    367:
                    368: prune()
                    369: {
                    370:        register i,j;
                    371:        for(pref=0;pref<len[0]&&pref<len[1]&&
                    372:                file[0][pref+1].value==file[1][pref+1].value;
                    373:                pref++ ) ;
                    374:        for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&
                    375:                file[0][len[0]-suff].value==file[1][len[1]-suff].value;
                    376:                suff++) ;
                    377:        for(j=0;j<2;j++) {
                    378:                sfile[j] = file[j]+pref;
                    379:                slen[j] = len[j]-pref-suff;
                    380:                for(i=0;i<=slen[j];i++)
                    381:                        sfile[j][i].serial = i;
                    382:        }
                    383: }
                    384:
                    385: equiv(a,n,b,m,c)
                    386: struct line *a, *b;
                    387: int *c;
                    388: {
                    389:        register int i, j;
                    390:        i = j = 1;
                    391:        while(i<=n && j<=m) {
                    392:                if(a[i].value <b[j].value)
                    393:                        a[i++].value = 0;
                    394:                else if(a[i].value == b[j].value)
                    395:                        a[i++].value = j;
                    396:                else
                    397:                        j++;
                    398:        }
                    399:        while(i <= n)
                    400:                a[i++].value = 0;
                    401:        b[m+1].value = 0;
                    402:        j = 0;
                    403:        while(++j <= m) {
                    404:                c[j] = -b[j].serial;
                    405:                while(b[j+1].value == b[j].value) {
                    406:                        j++;
                    407:                        c[j] = b[j].serial;
                    408:                }
                    409:        }
                    410:        c[j] = -1;
                    411: }
                    412:
                    413: stone(a,n,b,c)
                    414: int *a;
                    415: int *b;
                    416: register int *c;
                    417: {
                    418:        register int i, k,y;
                    419:        int j, l;
                    420:        int oldc, tc;
                    421:        int oldl;
                    422:        k = 0;
                    423:        c[0] = newcand(0,0,0);
                    424:        for(i=1; i<=n; i++) {
                    425:                j = a[i];
                    426:                if(j==0)
                    427:                        continue;
                    428:                y = -b[j];
                    429:                oldl = 0;
                    430:                oldc = c[0];
                    431:                do {
                    432:                        if(y <= clist[oldc].y)
                    433:                                continue;
                    434:                        l = search(c, k, y);
                    435:                        if(l!=oldl+1)
                    436:                                oldc = c[l-1];
                    437:                        if(l<=k) {
                    438:                                if(clist[c[l]].y <= y)
                    439:                                        continue;
                    440:                                tc = c[l];
                    441:                                c[l] = newcand(i,y,oldc);
                    442:                                oldc = tc;
                    443:                                oldl = l;
                    444:                        } else {
                    445:                                c[l] = newcand(i,y,oldc);
                    446:                                k++;
                    447:                                break;
                    448:                        }
                    449:                } while((y=b[++j]) > 0);
                    450:        }
                    451:        return(k);
                    452: }
                    453:
                    454: newcand(x,y,pred)
                    455: {
                    456:        register struct cand *q;
                    457:        clist = (struct cand *)ralloc((char *)clist,++clen*sizeof(cand));
                    458:        q = clist + clen -1;
                    459:        q->x = x;
                    460:        q->y = y;
                    461:        q->pred = pred;
                    462:        return(clen-1);
                    463: }
                    464:
                    465: search(c, k, y)
                    466: int *c;
                    467: {
                    468:        register int i, j, l;
                    469:        int t;
                    470:        if(clist[c[k]].y<y)     /*quick look for typical case*/
                    471:                return(k+1);
                    472:        i = 0;
                    473:        j = k+1;
                    474:        while (1) {
                    475:                l = i + j;
                    476:                if ((l >>= 1) <= i)
                    477:                        break;
                    478:                t = clist[c[l]].y;
                    479:                if(t > y)
                    480:                        j = l;
                    481:                else if(t < y)
                    482:                        i = l;
                    483:                else
                    484:                        return(l);
                    485:        }
                    486:        return(l+1);
                    487: }
                    488:
                    489: unravel(p)
                    490: {
                    491:        register int i;
                    492:        register struct cand *q;
                    493:        for(i=0; i<=len[0]; i++)
                    494:                J[i] =  i<=pref ? i:
                    495:                        i>len[0]-suff ? i+len[1]-len[0]:
                    496:                        0;
                    497:        for(q=clist+p;q->y!=0;q=clist+q->pred)
                    498:                J[q->x+pref] = q->y+pref;
                    499: }
                    500:
                    501: /* check does double duty:
                    502: 1.  ferret out any fortuitous correspondences due
                    503: to confounding by hashing (which result in "jackpot")
                    504: 2.  collect random access indexes to the two files */
                    505:
                    506: check()
                    507: {
                    508:        register int i, j;
                    509:        int jackpot;
                    510:        long ctold, ctnew;
                    511:        register int c,d;
                    512:
                    513:        if ((input[0] = fopen(file1,"r")) == NULL) {
                    514:                perror(file1);
                    515:                done();
                    516:        }
                    517:        if ((input[1] = fopen(file2,"r")) == NULL) {
                    518:                perror(file2);
                    519:                done();
                    520:        }
                    521:        j = 1;
                    522:        ixold[0] = ixnew[0] = 0;
                    523:        jackpot = 0;
                    524:        ctold = ctnew = 0;
                    525:        for(i=1;i<=len[0];i++) {
                    526:                if(J[i]==0) {
                    527:                        ixold[i] = ctold += skipline(0);
                    528:                        continue;
                    529:                }
                    530:                while(j<J[i]) {
                    531:                        ixnew[j] = ctnew += skipline(1);
                    532:                        j++;
                    533:                }
                    534:                if(bflag || wflag || iflag) {
                    535:                        for(;;) {
                    536:                                c = getc(input[0]);
                    537:                                d = getc(input[1]);
                    538:                                ctold++;
                    539:                                ctnew++;
                    540:                                if(bflag && isspace(c) && isspace(d)) {
                    541:                                        do {
                    542:                                                if(c=='\n')
                    543:                                                        break;
                    544:                                                ctold++;
                    545:                                        } while(isspace(c=getc(input[0])));
                    546:                                        do {
                    547:                                                if(d=='\n')
                    548:                                                        break;
                    549:                                                ctnew++;
                    550:                                        } while(isspace(d=getc(input[1])));
                    551:                                } else if ( wflag ) {
                    552:                                        while( isspace(c) && c!='\n' ) {
                    553:                                                c=getc(input[0]);
                    554:                                                ctold++;
                    555:                                        }
                    556:                                        while( isspace(d) && d!='\n' ) {
                    557:                                                d=getc(input[1]);
                    558:                                                ctnew++;
                    559:                                        }
                    560:                                }
                    561:                                if(chrtran[c] != chrtran[d]) {
                    562:                                        jackpot++;
                    563:                                        J[i] = 0;
                    564:                                        if(c!='\n')
                    565:                                                ctold += skipline(0);
                    566:                                        if(d!='\n')
                    567:                                                ctnew += skipline(1);
                    568:                                        break;
                    569:                                }
                    570:                                if(c=='\n')
                    571:                                        break;
                    572:                        }
                    573:                } else {
                    574:                        for(;;) {
                    575:                                ctold++;
                    576:                                ctnew++;
                    577:                                if((c=getc(input[0])) != (d=getc(input[1]))) {
                    578:                                        /* jackpot++; */
                    579:                                        J[i] = 0;
                    580:                                        if(c!='\n')
                    581:                                                ctold += skipline(0);
                    582:                                        if(d!='\n')
                    583:                                                ctnew += skipline(1);
                    584:                                        break;
                    585:                                }
                    586:                                if(c=='\n')
                    587:                                        break;
                    588:                        }
                    589:                }
                    590:                ixold[i] = ctold;
                    591:                ixnew[j] = ctnew;
                    592:                j++;
                    593:        }
                    594:        for(;j<=len[1];j++) {
                    595:                ixnew[j] = ctnew += skipline(1);
                    596:        }
                    597:        fclose(input[0]);
                    598:        fclose(input[1]);
                    599: /*
                    600:        if(jackpot)
                    601:                fprintf(stderr, "jackpot\n");
                    602: */
                    603: }
                    604:
                    605: sort(a,n)      /*shellsort CACM #201*/
                    606: struct line *a;
                    607: {
                    608:        struct line w;
                    609:        register int j,m;
                    610:        struct line *ai;
                    611:        register struct line *aim;
                    612:        int k;
                    613:
                    614:        if (n == 0)
                    615:                return;
                    616:        for(j=1;j<=n;j*= 2)
                    617:                m = 2*j - 1;
                    618:        for(m/=2;m!=0;m/=2) {
                    619:                k = n-m;
                    620:                for(j=1;j<=k;j++) {
                    621:                        for(ai = &a[j]; ai > a; ai -= m) {
                    622:                                aim = &ai[m];
                    623:                                if(aim < ai)
                    624:                                        break;  /*wraparound*/
                    625:                                if(aim->value > ai[0].value ||
                    626:                                   aim->value == ai[0].value &&
                    627:                                   aim->serial > ai[0].serial)
                    628:                                        break;
                    629:                                w.value = ai[0].value;
                    630:                                ai[0].value = aim->value;
                    631:                                aim->value = w.value;
                    632:                                w.serial = ai[0].serial;
                    633:                                ai[0].serial = aim->serial;
                    634:                                aim->serial = w.serial;
                    635:                        }
                    636:                }
                    637:        }
                    638: }
                    639:
                    640: unsort(f, l, b)
                    641: struct line *f;
                    642: int *b;
                    643: {
                    644:        register int *a;
                    645:        register int i;
                    646:        a = (int *)talloc((l+1)*sizeof(int));
                    647:        for(i=1;i<=l;i++)
                    648:                a[f[i].serial] = f[i].value;
                    649:        for(i=1;i<=l;i++)
                    650:                b[i] = a[i];
                    651:        free((char *)a);
                    652: }
                    653:
                    654: skipline(f)
                    655: {
                    656:        register i, c;
                    657:
                    658:        for(i=1;(c=getc(input[f]))!='\n';i++)
                    659:                if (c < 0)
                    660:                        return(i);
                    661:        return(i);
                    662: }
                    663:
                    664: output()
                    665: {
                    666:        int m;
                    667:        register int i0, i1, j1;
                    668:        int j0;
                    669:        input[0] = fopen(file1,"r");
                    670:        input[1] = fopen(file2,"r");
                    671:        m = len[0];
                    672:        J[0] = 0;
                    673:        J[m+1] = len[1]+1;
                    674:        if(opt!=D_EDIT) for(i0=1;i0<=m;i0=i1+1) {
                    675:                while(i0<=m&&J[i0]==J[i0-1]+1) i0++;
                    676:                j0 = J[i0-1]+1;
                    677:                i1 = i0-1;
                    678:                while(i1<m&&J[i1+1]==0) i1++;
                    679:                j1 = J[i1+1]-1;
                    680:                J[i1] = j1;
                    681:                change(i0,i1,j0,j1);
                    682:        } else for(i0=m;i0>=1;i0=i1-1) {
                    683:                while(i0>=1&&J[i0]==J[i0+1]-1&&J[i0]!=0) i0--;
                    684:                j0 = J[i0+1]-1;
                    685:                i1 = i0+1;
                    686:                while(i1>1&&J[i1-1]==0) i1--;
                    687:                j1 = J[i1-1]+1;
                    688:                J[i1] = j1;
                    689:                change(i1,i0,j1,j0);
                    690:        }
                    691:        if(m==0)
                    692:                change(1,0,1,len[1]);
                    693:        if (opt==D_IFDEF) {
                    694:                for (;;) {
                    695: #define        c i0
                    696:                        c = getc(input[0]);
                    697:                        if (c < 0)
                    698:                                return;
                    699:                        putchar(c);
                    700:                }
                    701: #undef c
                    702:        }
                    703:        if (anychange && opt == D_CONTEXT)
                    704:                dump_context_vec();
                    705: }
                    706:
                    707: /*
                    708:  * The following struct is used to record change information when
                    709:  * doing a "context" diff.  (see routine "change" to understand the
                    710:  * highly mneumonic field names)
                    711:  */
                    712: struct context_vec {
                    713:        int     a;      /* start line in old file */
                    714:        int     b;      /* end line in old file */
                    715:        int     c;      /* start line in new file */
                    716:        int     d;      /* end line in new file */
                    717: };
                    718:
                    719: struct context_vec     *context_vec_start,
                    720:                        *context_vec_end,
                    721:                        *context_vec_ptr;
                    722:
                    723: #define        MAX_CONTEXT     128
                    724:
                    725: /* indicate that there is a difference between lines a and b of the from file
                    726:    to get to lines c to d of the to file.
                    727:    If a is greater then b then there are no lines in the from file involved
                    728:    and this means that there were lines appended (beginning at b).
                    729:    If c is greater than d then there are lines missing from the to file.
                    730: */
                    731: change(a,b,c,d)
                    732: {
                    733:        int lowa,upb,lowc,upd;
                    734:        struct stat stbuf;
                    735:
                    736:        if (opt != D_IFDEF && a>b && c>d)
                    737:                return;
                    738:        if (anychange == 0) {
                    739:                anychange = 1;
                    740:                if(opt == D_CONTEXT) {
                    741:                        printf("*** %s  ", file1);
                    742:                        stat(file1, &stbuf);
                    743:                        printf("%s--- %s        ",
                    744:                            ctime(&stbuf.st_mtime), file2);
                    745:                        stat(file2, &stbuf);
                    746:                        printf("%s", ctime(&stbuf.st_mtime));
                    747:
                    748:                        context_vec_start = (struct context_vec *)
                    749:                                                malloc(MAX_CONTEXT *
                    750:                                                   sizeof(struct context_vec));
                    751:                        context_vec_end = context_vec_start + MAX_CONTEXT;
                    752:                        context_vec_ptr = context_vec_start - 1;
                    753:                }
                    754:        }
                    755:        if(opt == D_CONTEXT) {
                    756:                /*
                    757:                 * if this new change is within 'context' lines of
                    758:                 * the previous change, just add it to the change
                    759:                 * record.  If the record is full or if this
                    760:                 * change is more than 'context' lines from the previous
                    761:                 * change, dump the record, reset it & add the new change.
                    762:                 */
                    763:                if ( context_vec_ptr >= context_vec_end ||
                    764:                     ( context_vec_ptr >= context_vec_start &&
                    765:                       a > (context_vec_ptr->b + 2*context) &&
                    766:                       c > (context_vec_ptr->d + 2*context) ) )
                    767:                        dump_context_vec();
                    768:
                    769:                context_vec_ptr++;
                    770:                context_vec_ptr->a = a;
                    771:                context_vec_ptr->b = b;
                    772:                context_vec_ptr->c = c;
                    773:                context_vec_ptr->d = d;
                    774:                return;
                    775:        }
                    776:        switch (opt) {
                    777:
                    778:        case D_NORMAL:
                    779:        case D_EDIT:
                    780:                range(a,b,",");
                    781:                putchar(a>b?'a':c>d?'d':'c');
                    782:                if(opt==D_NORMAL)
                    783:                        range(c,d,",");
                    784:                putchar('\n');
                    785:                break;
                    786:        case D_REVERSE:
                    787:                putchar(a>b?'a':c>d?'d':'c');
                    788:                range(a,b," ");
                    789:                putchar('\n');
                    790:                break;
                    791:         case D_NREVERSE:
                    792:                 if (a>b)
                    793:                         printf("a%d %d\n",b,d-c+1);
                    794:                 else {
                    795:                         printf("d%d %d\n",a,b-a+1);
                    796:                         if (!(c>d))
                    797:                            /* add changed lines */
                    798:                            printf("a%d %d\n",b, d-c+1);
                    799:                 }
                    800:                 break;
                    801:        }
                    802:        if(opt == D_NORMAL || opt == D_IFDEF) {
                    803:                fetch(ixold,a,b,input[0],"< ", 1);
                    804:                if(a<=b&&c<=d && opt == D_NORMAL)
                    805:                        prints("---\n");
                    806:        }
                    807:        fetch(ixnew,c,d,input[1],opt==D_NORMAL?"> ":"", 0);
                    808:        if ((opt ==D_EDIT || opt == D_REVERSE) && c<=d)
                    809:                prints(".\n");
                    810:        if (inifdef) {
                    811:                fprintf(stdout, "#endif /* %s */\n", endifname);
                    812:                inifdef = 0;
                    813:        }
                    814: }
                    815:
                    816: range(a,b,separator)
                    817: char *separator;
                    818: {
                    819:        printf("%d", a>b?b:a);
                    820:        if(a<b) {
                    821:                printf("%s%d", separator, b);
                    822:        }
                    823: }
                    824:
                    825: fetch(f,a,b,lb,s,oldfile)
                    826: long *f;
                    827: FILE *lb;
                    828: char *s;
                    829: {
                    830:        register int i, j;
                    831:        register int c;
                    832:        register int col;
                    833:        register int nc;
                    834:        int oneflag = (*ifdef1!='\0') != (*ifdef2!='\0');
                    835:
                    836:        /*
                    837:         * When doing #ifdef's, copy down to current line
                    838:         * if this is the first file, so that stuff makes it to output.
                    839:         */
                    840:        if (opt == D_IFDEF && oldfile){
                    841:                long curpos = ftell(lb);
                    842:                /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
                    843:                nc = f[a>b? b : a-1 ] - curpos;
                    844:                for (i = 0; i < nc; i++)
                    845:                        putchar(getc(lb));
                    846:        }
                    847:        if (a > b)
                    848:                return;
                    849:        if (opt == D_IFDEF) {
                    850:                if (inifdef)
                    851:                        fprintf(stdout, "#else /* %s%s */\n", oneflag && oldfile==1 ? "!" : "", ifdef2);
                    852:                else {
                    853:                        if (oneflag) {
                    854:                                /* There was only one ifdef given */
                    855:                                endifname = ifdef2;
                    856:                                if (oldfile)
                    857:                                        fprintf(stdout, "#ifndef %s\n", endifname);
                    858:                                else
                    859:                                        fprintf(stdout, "#ifdef %s\n", endifname);
                    860:                        }
                    861:                        else {
                    862:                                endifname = oldfile ? ifdef1 : ifdef2;
                    863:                                fprintf(stdout, "#ifdef %s\n", endifname);
                    864:                        }
                    865:                }
                    866:                inifdef = 1+oldfile;
                    867:        }
                    868:
                    869:        for(i=a;i<=b;i++) {
                    870:                fseek(lb,f[i-1],0);
                    871:                nc = f[i]-f[i-1];
                    872:                if (opt != D_IFDEF)
                    873:                        prints(s);
                    874:                col = 0;
                    875:                for(j=0;j<nc;j++) {
                    876:                        c = getc(lb);
                    877:                        if (c == '\t' && tflag)
                    878:                                do
                    879:                                        putchar(' ');
                    880:                                while (++col & 7);
                    881:                        else {
                    882:                                putchar(c);
                    883:                                col++;
                    884:                        }
                    885:                }
                    886:        }
                    887:
                    888:        if (inifdef && !wantelses) {
                    889:                fprintf(stdout, "#endif /* %s */\n", endifname);
                    890:                inifdef = 0;
                    891:        }
                    892: }
                    893:
                    894: #define POW2                   /* define only if HALFLONG is 2**n */
                    895: #define HALFLONG 16
                    896: #define low(x) (x&((1L<<HALFLONG)-1))
                    897: #define high(x)        (x>>HALFLONG)
                    898:
                    899: /*
                    900:  * hashing has the effect of
                    901:  * arranging line in 7-bit bytes and then
                    902:  * summing 1-s complement in 16-bit hunks
                    903:  */
                    904: readhash(f)
                    905: register FILE *f;
                    906: {
                    907:        register long sum;
                    908:        register unsigned shift;
                    909:        register t;
                    910:        register space;
                    911:
                    912:        sum = 1;
                    913:        space = 0;
                    914:        if(!bflag && !wflag) {
                    915:                if(iflag)
                    916:                        for(shift=0;(t=getc(f))!='\n';shift+=7) {
                    917:                                if(t==-1)
                    918:                                        return(0);
                    919:                                sum += (long)chrtran[t] << (shift
                    920: #ifdef POW2
                    921:                                    &= HALFLONG - 1);
                    922: #else
                    923:                                    %= HALFLONG);
                    924: #endif
                    925:                        }
                    926:                else
                    927:                        for(shift=0;(t=getc(f))!='\n';shift+=7) {
                    928:                                if(t==-1)
                    929:                                        return(0);
                    930:                                sum += (long)t << (shift
                    931: #ifdef POW2
                    932:                                    &= HALFLONG - 1);
                    933: #else
                    934:                                    %= HALFLONG);
                    935: #endif
                    936:                        }
                    937:        } else {
                    938:                for(shift=0;;) {
                    939:                        switch(t=getc(f)) {
                    940:                        case -1:
                    941:                                return(0);
                    942:                        case '\t':
                    943:                        case ' ':
                    944:                                space++;
                    945:                                continue;
                    946:                        default:
                    947:                                if(space && !wflag) {
                    948:                                        shift += 7;
                    949:                                        space = 0;
                    950:                                }
                    951:                                sum += (long)chrtran[t] << (shift
                    952: #ifdef POW2
                    953:                                    &= HALFLONG - 1);
                    954: #else
                    955:                                    %= HALFLONG);
                    956: #endif
                    957:                                shift += 7;
                    958:                                continue;
                    959:                        case '\n':
                    960:                                break;
                    961:                        }
                    962:                        break;
                    963:                }
                    964:        }
                    965:        sum = low(sum) + high(sum);
                    966:        return((short)low(sum) + (short)high(sum));
                    967: }
                    968:
                    969: #include <a.out.h>
                    970:
                    971: asciifile(f)
                    972:        FILE *f;
                    973: {
                    974:        char buf[BUFSIZ];
                    975:        register int cnt;
                    976:        register char *cp;
                    977:
                    978:        fseek(f, (long)0, 0);
                    979:        cnt = fread(buf, 1, BUFSIZ, f);
                    980:        if (cnt >= sizeof (struct exec)) {
                    981:                struct exec hdr;
                    982:                hdr = *(struct exec *)buf;
                    983:                if (!N_BADMAG(hdr))
                    984:                        return (0);
                    985:        }
                    986:        cp = buf;
                    987:        while (--cnt >= 0)
                    988:                if (*cp++ & 0200)
                    989:                        return (0);
                    990:        return (1);
                    991: }
                    992:
                    993:
                    994: /* dump accumulated "context" diff changes */
                    995: dump_context_vec()
                    996: {
                    997:        register int    a, b, c, d;
                    998:        register char   ch;
                    999:        register struct context_vec *cvp = context_vec_start;
                   1000:        register int    lowa, upb, lowc, upd;
                   1001:        register int    do_output;
                   1002:
                   1003:        if ( cvp > context_vec_ptr )
                   1004:                return;
                   1005:
                   1006:        lowa = max(1, cvp->a - context);
                   1007:        upb  = min(len[0], context_vec_ptr->b + context);
                   1008:        lowc = max(1, cvp->c - context);
                   1009:        upd  = min(len[1], context_vec_ptr->d + context);
                   1010:
                   1011:        printf("***************\n*** ");
                   1012:        range(lowa,upb,",");
                   1013:        printf(" ****\n");
                   1014:
                   1015:        /*
                   1016:         * output changes to the "old" file.  The first loop suppresses
                   1017:         * output if there were no changes to the "old" file (we'll see
                   1018:         * the "old" lines as context in the "new" list).
                   1019:         */
                   1020:        do_output = 0;
                   1021:        for ( ; cvp <= context_vec_ptr; cvp++)
                   1022:                if (cvp->a <= cvp->b) {
                   1023:                        cvp = context_vec_start;
                   1024:                        do_output++;
                   1025:                        break;
                   1026:                }
                   1027:
                   1028:        if ( do_output ) {
                   1029:                while (cvp <= context_vec_ptr) {
                   1030:                        a = cvp->a; b = cvp->b; c = cvp->c; d = cvp->d;
                   1031:
                   1032:                        if (a <= b && c <= d)
                   1033:                                ch = 'c';
                   1034:                        else
                   1035:                                ch = (a <= b) ? 'd' : 'a';
                   1036:
                   1037:                        if (ch == 'a')
                   1038:                                fetch(ixold,lowa,b,input[0],"  ");
                   1039:                        else {
                   1040:                                fetch(ixold,lowa,a-1,input[0],"  ");
                   1041:                                fetch(ixold,a,b,input[0],ch == 'c' ? "! " : "- ");
                   1042:                        }
                   1043:                        lowa = b + 1;
                   1044:                        cvp++;
                   1045:                }
                   1046:                fetch(ixold, b+1, upb, input[0], "  ");
                   1047:        }
                   1048:
                   1049:        /* output changes to the "new" file */
                   1050:        printf("--- ");
                   1051:        range(lowc,upd,",");
                   1052:        printf(" ----\n");
                   1053:
                   1054:        do_output = 0;
                   1055:        for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
                   1056:                if (cvp->c <= cvp->d) {
                   1057:                        cvp = context_vec_start;
                   1058:                        do_output++;
                   1059:                        break;
                   1060:                }
                   1061:
                   1062:        if (do_output) {
                   1063:                while (cvp <= context_vec_ptr) {
                   1064:                        a = cvp->a; b = cvp->b; c = cvp->c; d = cvp->d;
                   1065:
                   1066:                        if (a <= b && c <= d)
                   1067:                                ch = 'c';
                   1068:                        else
                   1069:                                ch = (a <= b) ? 'd' : 'a';
                   1070:
                   1071:                        if (ch == 'd')
                   1072:                                fetch(ixnew,lowc,d,input[1],"  ");
                   1073:                        else {
                   1074:                                fetch(ixnew,lowc,c-1,input[1],"  ");
                   1075:                                fetch(ixnew,c,d,input[1],ch == 'c' ? "! " : "+ ");
                   1076:                        }
                   1077:                        lowc = d + 1;
                   1078:                        cvp++;
                   1079:                }
                   1080:                fetch(ixnew, d+1, upd, input[1], "  ");
                   1081:        }
                   1082:
                   1083:        context_vec_ptr = context_vec_start - 1;
                   1084: }