[BACK]Return to blocks.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / rsync

Annotation of src/usr.bin/rsync/blocks.c, Revision 1.22

1.22    ! claudio     1: /*     $OpenBSD: blocks.c,v 1.21 2021/11/03 14:42:12 deraadt Exp $ */
1.1       benno       2: /*
                      3:  * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
1.18      florian    17: #include <sys/queue.h>
1.1       benno      18: #include <sys/stat.h>
                     19:
                     20: #include <assert.h>
                     21: #include <endian.h>
                     22: #include <errno.h>
                     23: #include <inttypes.h>
                     24: #include <stdio.h>
                     25: #include <stdlib.h>
                     26: #include <string.h>
                     27: #include <unistd.h>
                     28:
1.6       tb         29: #include <openssl/md4.h>
                     30:
1.1       benno      31: #include "extern.h"
                     32:
1.18      florian    33: struct blkhash {
                     34:        const struct blk        *blk;
1.19      deraadt    35:        TAILQ_ENTRY(blkhash)     entries;
1.18      florian    36: };
                     37:
                     38: TAILQ_HEAD(blkhashq, blkhash);
                     39:
                     40: /*
                     41:  * Table used by the sender for looking up blocks.
                     42:  * The blocks are those sent by the receiver; we're looking up using
                     43:  * hashes computed from our local file.
                     44:  */
                     45: struct blktab {
                     46:        struct blkhashq *q; /* entries in the hashtable */
                     47:        size_t           qsz; /* size of the hashtable */
                     48:        struct blkhash  *blks; /* pre-allocated hashtable entries */
                     49: };
                     50:
                     51: /*
                     52:  * This is the number of buckets in the hashtable.
                     53:  * Use the same that GPL rsync uses.
                     54:  * (It should be dynamic?)
                     55:  */
                     56: #define        BLKTAB_SZ         65536
                     57:
                     58: /*
                     59:  * Initialise an empty hashtable with BLKTAB_SZ entries in it.
                     60:  * Populate it for each file with blkhash_set.
                     61:  * When we've processed all files, call blkhash_free.
                     62:  * Returns NULL on allocation failure.
                     63:  */
                     64: struct blktab *
                     65: blkhash_alloc(void)
                     66: {
                     67:        struct blktab   *p;
                     68:
                     69:        if ((p = calloc(1, sizeof(struct blktab))) == NULL) {
                     70:                ERR("calloc");
                     71:                return NULL;
                     72:        }
                     73:        p->qsz = BLKTAB_SZ;
                     74:        p->q = calloc(p->qsz, sizeof(struct blkhashq));
                     75:        if (p->q == NULL) {
                     76:                ERR("calloc");
                     77:                free(p);
                     78:                return NULL;
                     79:        }
                     80:        return p;
                     81: }
                     82:
                     83: /*
                     84:  * Populate the hashtable with an incoming file's blocks.
                     85:  * This will clear out any existing hashed data.
                     86:  * Returns zero on allocation failure, non-zero otherwise.
                     87:  */
                     88: int
                     89: blkhash_set(struct blktab *p, const struct blkset *bset)
                     90: {
                     91:        size_t   i, idx;
                     92:
                     93:        if (bset == NULL)
                     94:                return 1;
                     95:
                     96:        /* Wipe clean the table. */
                     97:
                     98:        for (i = 0; i < p->qsz; i++)
                     99:                TAILQ_INIT(&p->q[i]);
                    100:
                    101:        /* Fill in the hashtable. */
                    102:
1.19      deraadt   103:        p->blks = reallocarray(p->blks, bset->blksz, sizeof(struct blkhash));
1.18      florian   104:        if (p->blks == NULL) {
                    105:                ERR("reallocarray");
                    106:                return 0;
                    107:        }
                    108:        for (i = 0; i < bset->blksz; i++) {
                    109:                p->blks[i].blk = &bset->blks[i];
                    110:                idx = bset->blks[i].chksum_short % p->qsz;
                    111:                assert(idx < p->qsz);
                    112:                TAILQ_INSERT_TAIL(&p->q[idx], &p->blks[i], entries);
                    113:        }
                    114:
                    115:        return 1;
                    116: }
                    117:
                    118: /*
                    119:  * Free as allocated with blkhash_alloc().
                    120:  */
                    121: void
                    122: blkhash_free(struct blktab *p)
                    123: {
                    124:
                    125:        free(p->blks);
                    126:        free(p);
                    127: }
                    128:
1.1       benno     129: /*
                    130:  * From our current position of "offs" in buffer "buf" of total size
                    131:  * "size", see if we can find a matching block in our list of blocks.
                    132:  * The "hint" refers to the block that *might* work.
                    133:  * Returns the blk or NULL if no matching block was found.
                    134:  */
1.18      florian   135: static const struct blk *
                    136: blk_find(struct sess *sess, struct blkstat *st,
                    137:        const struct blkset *blks, const char *path, int recomp)
1.1       benno     138: {
                    139:        unsigned char    md[MD4_DIGEST_LENGTH];
                    140:        uint32_t         fhash;
                    141:        off_t            remain, osz;
                    142:        int              have_md = 0;
1.18      florian   143:        char            *map;
                    144:        const struct blkhashq *q;
                    145:        const struct blkhash *ent;
                    146:
                    147:        remain = st->mapsz - st->offs;
                    148:        assert(remain);
                    149:        osz = MINIMUM(remain, (off_t)blks->len);
1.1       benno     150:
                    151:        /*
1.18      florian   152:         * First, compute our fast hash the hard way (if we're
                    153:         * reentering this function from a previous block match, or the
                    154:         * first time) or from our existing s1 and s2 values.
1.1       benno     155:         */
                    156:
1.18      florian   157:        if (!recomp) {
                    158:                fhash = (st->s1 & 0xFFFF) | (st->s2 << 16);
                    159:        } else {
                    160:                fhash = hash_fast(st->map + st->offs, (size_t)osz);
                    161:                st->s1 = fhash & 0xFFFF;
                    162:                st->s2 = fhash >> 16;
                    163:        }
1.1       benno     164:
                    165:        /*
                    166:         * Start with our match hint.
                    167:         * This just runs the fast and slow check with the hint.
                    168:         */
                    169:
1.18      florian   170:        if (st->hint < blks->blksz &&
                    171:            fhash == blks->blks[st->hint].chksum_short &&
                    172:            (size_t)osz == blks->blks[st->hint].len) {
                    173:                hash_slow(st->map + st->offs, (size_t)osz, md, sess);
1.1       benno     174:                have_md = 1;
1.18      florian   175:                if (memcmp(md, blks->blks[st->hint].chksum_long, blks->csum) == 0) {
1.15      benno     176:                        LOG4("%s: found matching hinted match: "
1.14      deraadt   177:                            "position %jd, block %zu (position %jd, size %zu)",
                    178:                            path,
1.18      florian   179:                            (intmax_t)st->offs, blks->blks[st->hint].idx,
                    180:                            (intmax_t)blks->blks[st->hint].offs,
                    181:                            blks->blks[st->hint].len);
                    182:                        return &blks->blks[st->hint];
1.1       benno     183:                }
                    184:        }
                    185:
                    186:        /*
1.18      florian   187:         * Look for the fast hash modulus in our hashtable, filter for
                    188:         * those matching the full hash and length, then move to the
                    189:         * slow hash.
                    190:         * The slow hash is computed only once.
1.1       benno     191:         */
                    192:
1.18      florian   193:        q = &st->blktab->q[fhash % st->blktab->qsz];
                    194:
                    195:        TAILQ_FOREACH(ent, q, entries) {
                    196:                if (fhash != ent->blk->chksum_short ||
                    197:                    (size_t)osz != ent->blk->len)
1.1       benno     198:                        continue;
                    199:
1.15      benno     200:                LOG4("%s: found matching fast match: "
1.14      deraadt   201:                    "position %jd, block %zu (position %jd, size %zu)",
1.18      florian   202:                    path, (intmax_t)st->offs, ent->blk->idx,
                    203:                    (intmax_t)ent->blk->offs, ent->blk->len);
1.1       benno     204:
1.4       deraadt   205:                if (have_md == 0) {
1.18      florian   206:                        hash_slow(st->map + st->offs, (size_t)osz, md, sess);
1.1       benno     207:                        have_md = 1;
                    208:                }
                    209:
1.18      florian   210:                if (memcmp(md, ent->blk->chksum_long, blks->csum))
1.1       benno     211:                        continue;
                    212:
1.15      benno     213:                LOG4("%s: sender verifies slow match", path);
1.18      florian   214:                return ent->blk;
1.1       benno     215:        }
                    216:
1.18      florian   217:        /*
                    218:         * Adjust our partial sums for the hashing.
                    219:         * We first remove the first byte from the sum.
                    220:         * We then, if we have space, add the first byte of the next
                    221:         * block in the sequence.
                    222:         */
                    223:
                    224:        map = st->map + st->offs;
                    225:        st->s1 -= map[0];
                    226:        st->s2 -= osz * map[0];
                    227:
                    228:        if (osz < remain) {
                    229:                st->s1 += map[osz];
                    230:                st->s2 += st->s1;
1.19      deraadt   231:        }
1.18      florian   232:
1.1       benno     233:        return NULL;
                    234: }
                    235:
                    236: /*
                    237:  * Given a local file "path" and the blocks created by a remote machine,
                    238:  * find out which blocks of our file they don't have and send them.
1.7       florian   239:  * This function is reentrant: it must be called while there's still
                    240:  * data to send.
1.1       benno     241:  */
1.9       florian   242: void
                    243: blk_match(struct sess *sess, const struct blkset *blks,
1.7       florian   244:        const char *path, struct blkstat *st)
1.1       benno     245: {
1.18      florian   246:        off_t             last, end, sz;
                    247:        int32_t           tok;
                    248:        size_t            i;
                    249:        const struct blk *blk;
1.1       benno     250:
                    251:        /*
                    252:         * If the file's empty or we don't have any blocks from the
                    253:         * sender, then simply send the whole file.
                    254:         * Otherwise, run the hash matching routine and send raw chunks
                    255:         * and subsequent matching tokens.
                    256:         */
                    257:
1.7       florian   258:        if (st->mapsz && blks->blksz) {
1.9       florian   259:                /*
                    260:                 * Stop searching at the length of the file minus the
                    261:                 * size of the last block.
                    262:                 * The reason for this being that we don't need to do an
                    263:                 * incremental hash within the last block---if it
                    264:                 * doesn't match, it doesn't match.
                    265:                 */
                    266:
                    267:                end = st->mapsz + 1 - blks->blks[blks->blksz - 1].len;
                    268:                last = st->offs;
                    269:
1.18      florian   270:                for (i = 0; st->offs < end; st->offs++, i++) {
                    271:                        blk = blk_find(sess, st, blks, path, i == 0);
1.9       florian   272:                        if (blk == NULL)
                    273:                                continue;
                    274:
                    275:                        sz = st->offs - last;
                    276:                        st->dirty += sz;
                    277:                        st->total += sz;
1.15      benno     278:                        LOG4("%s: flushing %jd B before %zu B block %zu",
1.14      deraadt   279:                            path, (intmax_t)sz,
                    280:                            blk->len, blk->idx);
1.9       florian   281:                        tok = -(blk->idx + 1);
                    282:
1.22    ! claudio   283:                        hash_file_buf(&st->ctx, st->map + last, sz + blk->len);
        !           284:
1.9       florian   285:                        /*
                    286:                         * Write the data we have, then follow it with
                    287:                         * the tag of the block that matches.
                    288:                         */
                    289:
                    290:                        st->curpos = last;
                    291:                        st->curlen = st->curpos + sz;
                    292:                        st->curtok = tok;
1.10      deraadt   293:                        assert(st->curtok != 0);
1.9       florian   294:                        st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
                    295:                        st->total += blk->len;
                    296:                        st->offs += blk->len;
                    297:                        st->hint = blk->idx + 1;
1.22    ! claudio   298:
1.9       florian   299:                        return;
1.1       benno     300:                }
                    301:
1.9       florian   302:                /* Emit remaining data and send terminator token. */
                    303:
                    304:                sz = st->mapsz - last;
1.15      benno     305:                LOG4("%s: flushing remaining %jd B",
1.14      deraadt   306:                    path, (intmax_t)sz);
1.1       benno     307:
1.9       florian   308:                st->total += sz;
                    309:                st->dirty += sz;
                    310:                st->curpos = last;
                    311:                st->curlen = st->curpos + sz;
                    312:                st->curtok = 0;
                    313:                st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
1.22    ! claudio   314:
        !           315:                hash_file_buf(&st->ctx, st->map + st->curpos, sz);
1.9       florian   316:        } else {
                    317:                st->curpos = 0;
                    318:                st->curlen = st->mapsz;
                    319:                st->curtok = 0;
                    320:                st->curst = st->mapsz ? BLKSTAT_DATA : BLKSTAT_TOK;
                    321:                st->dirty = st->total = st->mapsz;
1.22    ! claudio   322:
        !           323:                hash_file_buf(&st->ctx, st->map, st->mapsz);
1.1       benno     324:
1.15      benno     325:                LOG4("%s: flushing whole file %zu B",
1.14      deraadt   326:                    path, st->mapsz);
1.1       benno     327:        }
                    328: }
                    329:
                    330: /*
1.13      benno     331:  * Buffer the message from sender to the receiver indicating that the
                    332:  * block set has been received.
1.1       benno     333:  * Symmetrises blk_send_ack().
                    334:  */
1.13      benno     335: void
1.16      benno     336: blk_recv_ack(char buf[20], const struct blkset *blocks, int32_t idx)
1.1       benno     337: {
1.8       florian   338:        size_t   pos = 0, sz;
1.1       benno     339:
1.8       florian   340:        sz = sizeof(int32_t) + /* index */
1.21      deraadt   341:            sizeof(int32_t) + /* block count */
                    342:            sizeof(int32_t) + /* block length */
                    343:            sizeof(int32_t) + /* checksum length */
                    344:            sizeof(int32_t); /* block remainder */
1.13      benno     345:        assert(sz == 20);
1.8       florian   346:
1.16      benno     347:        io_buffer_int(buf, &pos, sz, idx);
                    348:        io_buffer_int(buf, &pos, sz, blocks->blksz);
                    349:        io_buffer_int(buf, &pos, sz, blocks->len);
                    350:        io_buffer_int(buf, &pos, sz, blocks->csum);
                    351:        io_buffer_int(buf, &pos, sz, blocks->rem);
1.8       florian   352:        assert(pos == sz);
1.1       benno     353: }
                    354:
                    355: /*
                    356:  * Read all of the checksums for a file's blocks.
                    357:  * Returns the set of blocks or NULL on failure.
                    358:  */
                    359: struct blkset *
                    360: blk_recv(struct sess *sess, int fd, const char *path)
                    361: {
                    362:        struct blkset   *s;
                    363:        int32_t          i;
                    364:        size_t           j;
                    365:        struct blk      *b;
                    366:        off_t            offs = 0;
                    367:
1.4       deraadt   368:        if ((s = calloc(1, sizeof(struct blkset))) == NULL) {
1.15      benno     369:                ERR("calloc");
1.1       benno     370:                return NULL;
                    371:        }
                    372:
                    373:        /*
                    374:         * The block prologue consists of a few values that we'll need
                    375:         * in reading the individual blocks for this file.
                    376:         * FIXME: read into buffer and unbuffer.
                    377:         */
                    378:
1.3       deraadt   379:        if (!io_read_size(sess, fd, &s->blksz)) {
1.15      benno     380:                ERRX1("io_read_size");
1.1       benno     381:                goto out;
1.3       deraadt   382:        } else if (!io_read_size(sess, fd, &s->len)) {
1.15      benno     383:                ERRX1("io_read_size");
1.1       benno     384:                goto out;
1.3       deraadt   385:        } else if (!io_read_size(sess, fd, &s->csum)) {
1.15      benno     386:                ERRX1("io_read_int");
1.1       benno     387:                goto out;
1.3       deraadt   388:        } else if (!io_read_size(sess, fd, &s->rem)) {
1.15      benno     389:                ERRX1("io_read_int");
1.1       benno     390:                goto out;
                    391:        } else if (s->rem && s->rem >= s->len) {
1.15      benno     392:                ERRX("block remainder is "
1.1       benno     393:                        "greater than block size");
                    394:                goto out;
                    395:        }
                    396:
1.15      benno     397:        LOG3("%s: read block prologue: %zu blocks of "
1.14      deraadt   398:            "%zu B, %zu B remainder, %zu B checksum", path,
                    399:            s->blksz, s->len, s->rem, s->csum);
1.1       benno     400:
                    401:        if (s->blksz) {
                    402:                s->blks = calloc(s->blksz, sizeof(struct blk));
1.4       deraadt   403:                if (s->blks == NULL) {
1.15      benno     404:                        ERR("calloc");
1.1       benno     405:                        goto out;
                    406:                }
                    407:        }
                    408:
1.2       benno     409:        /*
1.1       benno     410:         * Read each block individually.
                    411:         * FIXME: read buffer and unbuffer.
                    412:         */
                    413:
                    414:        for (j = 0; j < s->blksz; j++) {
                    415:                b = &s->blks[j];
1.3       deraadt   416:                if (!io_read_int(sess, fd, &i)) {
1.15      benno     417:                        ERRX1("io_read_int");
1.1       benno     418:                        goto out;
                    419:                }
                    420:                b->chksum_short = i;
                    421:
                    422:                assert(s->csum <= sizeof(b->chksum_long));
1.3       deraadt   423:                if (!io_read_buf(sess,
1.1       benno     424:                    fd, b->chksum_long, s->csum)) {
1.15      benno     425:                        ERRX1("io_read_buf");
1.1       benno     426:                        goto out;
                    427:                }
                    428:
                    429:                /*
                    430:                 * If we're the last block, then we're assigned the
                    431:                 * remainder of the data.
                    432:                 */
                    433:
                    434:                b->offs = offs;
                    435:                b->idx = j;
                    436:                b->len = (j == (s->blksz - 1) && s->rem) ?
                    437:                        s->rem : s->len;
                    438:                offs += b->len;
                    439:
1.15      benno     440:                LOG4("%s: read block %zu, length %zu B",
1.14      deraadt   441:                    path, b->idx, b->len);
1.1       benno     442:        }
                    443:
                    444:        s->size = offs;
1.15      benno     445:        LOG3("%s: read blocks: %zu blocks, %jd B total blocked data",
1.14      deraadt   446:            path, s->blksz, (intmax_t)s->size);
1.1       benno     447:        return s;
                    448: out:
1.7       florian   449:        free(s->blks);
                    450:        free(s);
1.1       benno     451:        return NULL;
                    452: }
                    453:
                    454: /*
                    455:  * Symmetrise blk_recv_ack(), except w/o the leading identifier.
                    456:  * Return zero on failure, non-zero on success.
                    457:  */
                    458: int
                    459: blk_send_ack(struct sess *sess, int fd, struct blkset *p)
                    460: {
                    461:        char     buf[16];
                    462:        size_t   pos = 0, sz;
                    463:
                    464:        /* Put the entire send routine into a buffer. */
                    465:
                    466:        sz = sizeof(int32_t) + /* block count */
1.21      deraadt   467:            sizeof(int32_t) + /* block length */
                    468:            sizeof(int32_t) + /* checksum length */
                    469:            sizeof(int32_t); /* block remainder */
1.1       benno     470:        assert(sz <= sizeof(buf));
                    471:
1.3       deraadt   472:        if (!io_read_buf(sess, fd, buf, sz)) {
1.15      benno     473:                ERRX1("io_read_buf");
1.1       benno     474:                return 0;
                    475:        }
                    476:
1.16      benno     477:        if (!io_unbuffer_size(buf, &pos, sz, &p->blksz))
1.15      benno     478:                ERRX1("io_unbuffer_size");
1.16      benno     479:        else if (!io_unbuffer_size(buf, &pos, sz, &p->len))
1.15      benno     480:                ERRX1("io_unbuffer_size");
1.16      benno     481:        else if (!io_unbuffer_size(buf, &pos, sz, &p->csum))
1.15      benno     482:                ERRX1("io_unbuffer_size");
1.16      benno     483:        else if (!io_unbuffer_size(buf, &pos, sz, &p->rem))
1.15      benno     484:                ERRX1("io_unbuffer_size");
1.1       benno     485:        else if (p->len && p->rem >= p->len)
1.15      benno     486:                ERRX1("non-zero length is less than remainder");
1.4       deraadt   487:        else if (p->csum == 0 || p->csum > 16)
1.15      benno     488:                ERRX1("inappropriate checksum length");
1.1       benno     489:        else
                    490:                return 1;
                    491:
                    492:        return 0;
                    493: }
                    494:
                    495: /*
                    496:  * Transmit the metadata for set and blocks.
                    497:  * Return zero on failure, non-zero on success.
                    498:  */
                    499: int
                    500: blk_send(struct sess *sess, int fd, size_t idx,
                    501:        const struct blkset *p, const char *path)
                    502: {
                    503:        char    *buf;
                    504:        size_t   i, pos = 0, sz;
                    505:        int      rc = 0;
                    506:
                    507:        /* Put the entire send routine into a buffer. */
                    508:
                    509:        sz = sizeof(int32_t) + /* identifier */
1.2       benno     510:            sizeof(int32_t) + /* block count */
                    511:            sizeof(int32_t) + /* block length */
                    512:            sizeof(int32_t) + /* checksum length */
                    513:            sizeof(int32_t) + /* block remainder */
                    514:            p->blksz *
                    515:            (sizeof(int32_t) + /* short checksum */
                    516:                p->csum); /* long checksum */
1.1       benno     517:
1.4       deraadt   518:        if ((buf = malloc(sz)) == NULL) {
1.15      benno     519:                ERR("malloc");
1.1       benno     520:                return 0;
                    521:        }
                    522:
1.16      benno     523:        io_buffer_int(buf, &pos, sz, idx);
                    524:        io_buffer_int(buf, &pos, sz, p->blksz);
                    525:        io_buffer_int(buf, &pos, sz, p->len);
                    526:        io_buffer_int(buf, &pos, sz, p->csum);
                    527:        io_buffer_int(buf, &pos, sz, p->rem);
1.1       benno     528:
                    529:        for (i = 0; i < p->blksz; i++) {
1.19      deraadt   530:                io_buffer_int(buf, &pos, sz, p->blks[i].chksum_short);
1.16      benno     531:                io_buffer_buf(buf, &pos, sz, p->blks[i].chksum_long, p->csum);
1.1       benno     532:        }
                    533:
                    534:        assert(pos == sz);
                    535:
1.3       deraadt   536:        if (!io_write_buf(sess, fd, buf, sz)) {
1.15      benno     537:                ERRX1("io_write_buf");
1.1       benno     538:                goto out;
                    539:        }
                    540:
1.15      benno     541:        LOG3("%s: sent block prologue: %zu blocks of %zu B, "
1.14      deraadt   542:            "%zu B remainder, %zu B checksum",
                    543:            path, p->blksz, p->len, p->rem, p->csum);
1.1       benno     544:        rc = 1;
                    545: out:
                    546:        free(buf);
                    547:        return rc;
                    548: }