[BACK]Return to blocks.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / rsync

Annotation of src/usr.bin/rsync/blocks.c, Revision 1.7

1.7     ! florian     1: /*     $Id: blocks.c,v 1.6 2019/02/13 05:41:35 tb Exp $ */
1.1       benno       2: /*
                      3:  * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
                     17: #include <sys/stat.h>
                     18:
                     19: #include <assert.h>
                     20: #include <endian.h>
                     21: #include <errno.h>
                     22: #include <inttypes.h>
                     23: #include <stdio.h>
                     24: #include <stdlib.h>
                     25: #include <string.h>
                     26: #include <unistd.h>
                     27:
1.6       tb         28: #include <openssl/md4.h>
                     29:
1.1       benno      30: #include "extern.h"
                     31:
                     32: /*
                     33:  * Flush out "size" bytes of the buffer, doing all of the appropriate
                     34:  * chunking of the data, then the subsequent token (or zero).
                     35:  * Return zero on failure, non-zero on success.
                     36:  */
                     37: static int
                     38: blk_flush(struct sess *sess, int fd,
                     39:        const void *b, off_t size, int32_t token)
                     40: {
                     41:        off_t   i = 0, sz;
                     42:
                     43:        while (i < size) {
                     44:                sz = MAX_CHUNK < (size - i) ?
                     45:                        MAX_CHUNK : (size - i);
1.3       deraadt    46:                if (!io_write_int(sess, fd, sz)) {
1.1       benno      47:                        ERRX1(sess, "io_write_int");
                     48:                        return 0;
1.3       deraadt    49:                } else if (!io_write_buf(sess, fd, b + i, sz)) {
1.1       benno      50:                        ERRX1(sess, "io_write_buf");
                     51:                        return 0;
                     52:                }
                     53:                i += sz;
                     54:        }
                     55:
1.3       deraadt    56:        if (!io_write_int(sess, fd, token)) {
1.1       benno      57:                ERRX1(sess, "io_write_int");
                     58:                return 0;
                     59:        }
                     60:
                     61:        return 1;
                     62: }
                     63:
                     64: /*
                     65:  * From our current position of "offs" in buffer "buf" of total size
                     66:  * "size", see if we can find a matching block in our list of blocks.
                     67:  * The "hint" refers to the block that *might* work.
                     68:  * Returns the blk or NULL if no matching block was found.
                     69:  */
                     70: static struct blk *
                     71: blk_find(struct sess *sess, const void *buf, off_t size, off_t offs,
                     72:        const struct blkset *blks, const char *path, size_t hint)
                     73: {
                     74:        unsigned char    md[MD4_DIGEST_LENGTH];
                     75:        uint32_t         fhash;
                     76:        off_t            remain, osz;
                     77:        size_t           i;
                     78:        int              have_md = 0;
                     79:
                     80:        /*
                     81:         * First, compute our fast hash.
                     82:         * FIXME: yes, this can be a rolling computation, but I'm
                     83:         * deliberately making it simple first.
                     84:         */
                     85:
                     86:        remain = size - offs;
                     87:        assert(remain);
                     88:        osz = remain < (off_t)blks->len ? remain : (off_t)blks->len;
                     89:        fhash = hash_fast(buf + offs, (size_t)osz);
                     90:        have_md = 0;
                     91:
                     92:        /*
                     93:         * Start with our match hint.
                     94:         * This just runs the fast and slow check with the hint.
                     95:         */
                     96:
                     97:        if (hint < blks->blksz &&
                     98:            fhash == blks->blks[hint].chksum_short &&
                     99:            (size_t)osz == blks->blks[hint].len) {
                    100:                hash_slow(buf + offs, (size_t)osz, md, sess);
                    101:                have_md = 1;
1.4       deraadt   102:                if (memcmp(md, blks->blks[hint].chksum_long, blks->csum) == 0) {
1.1       benno     103:                        LOG4(sess, "%s: found matching hinted match: "
                    104:                                "position %jd, block %zu "
                    105:                                "(position %jd, size %zu)", path,
                    106:                                (intmax_t)offs, blks->blks[hint].idx,
                    107:                                (intmax_t)blks->blks[hint].offs,
                    108:                                blks->blks[hint].len);
                    109:                        return &blks->blks[hint];
                    110:                }
                    111:        }
                    112:
                    113:        /*
                    114:         * Now loop and look for the fast hash.
                    115:         * If it's found, move on to the slow hash.
                    116:         */
                    117:
                    118:        for (i = 0; i < blks->blksz; i++) {
                    119:                if (fhash != blks->blks[i].chksum_short)
                    120:                        continue;
                    121:                if ((size_t)osz != blks->blks[i].len)
                    122:                        continue;
                    123:
                    124:                LOG4(sess, "%s: found matching fast match: "
                    125:                        "position %jd, block %zu "
                    126:                        "(position %jd, size %zu)", path,
                    127:                        (intmax_t)offs, blks->blks[i].idx,
                    128:                        (intmax_t)blks->blks[i].offs,
                    129:                        blks->blks[i].len);
                    130:
                    131:                /* Compute slow hash on demand. */
                    132:
1.4       deraadt   133:                if (have_md == 0) {
1.1       benno     134:                        hash_slow(buf + offs, (size_t)osz, md, sess);
                    135:                        have_md = 1;
                    136:                }
                    137:
                    138:                if (memcmp(md, blks->blks[i].chksum_long, blks->csum))
                    139:                        continue;
                    140:
                    141:                LOG4(sess, "%s: sender verifies slow match", path);
                    142:                return &blks->blks[i];
                    143:        }
                    144:
                    145:        return NULL;
                    146: }
                    147:
                    148: /*
                    149:  * The main reconstruction algorithm on the sender side.
1.7     ! florian   150:  * This is reentrant: it's meant to be called whenever "fd" unblocks for
        !           151:  * writing by the sender.
1.1       benno     152:  * Scans byte-wise over the input file, looking for matching blocks in
                    153:  * what the server sent us.
                    154:  * If a block is found, emit all data up until the block, then the token
                    155:  * for the block.
                    156:  * The receiving end can then reconstruct the file trivially.
                    157:  * Return zero on failure, non-zero on success.
                    158:  */
                    159: static int
                    160: blk_match_send(struct sess *sess, const char *path, int fd,
1.7     ! florian   161:        const struct blkset *blks, struct blkstat *st)
1.1       benno     162: {
1.7     ! florian   163:        off_t            last, end, sz;
1.1       benno     164:        int32_t          tok;
                    165:        struct blk      *blk;
                    166:
                    167:        /*
                    168:         * Stop searching at the length of the file minus the size of
                    169:         * the last block.
                    170:         * The reason for this being that we don't need to do an
                    171:         * incremental hash within the last block---if it doesn't match,
                    172:         * it doesn't match.
                    173:         */
                    174:
1.7     ! florian   175:        end = st->mapsz + 1 - blks->blks[blks->blksz - 1].len;
        !           176:        last = st->offs;
1.1       benno     177:
1.7     ! florian   178:        for ( ; st->offs < end; st->offs++) {
        !           179:                blk = blk_find(sess, st->map, st->mapsz,
        !           180:                        st->offs, blks, path, st->hint);
1.4       deraadt   181:                if (blk == NULL)
1.1       benno     182:                        continue;
                    183:
1.7     ! florian   184:                sz = st->offs - last;
        !           185:                st->dirty += sz;
        !           186:                st->total += sz;
1.1       benno     187:                LOG4(sess, "%s: flushing %jd B before %zu B "
1.7     ! florian   188:                        "block %zu", path, (intmax_t)sz, blk->len, blk->idx);
1.1       benno     189:                tok = -(blk->idx + 1);
                    190:
                    191:                /*
                    192:                 * Write the data we have, then follow it with the tag
                    193:                 * of the block that matches.
                    194:                 * The receiver will then write our data, then the data
                    195:                 * it already has in the matching block.
                    196:                 */
                    197:
1.7     ! florian   198:                if (!blk_flush(sess, fd, st->map + last, sz, tok)) {
1.1       benno     199:                        ERRX1(sess, "blk_flush");
1.7     ! florian   200:                        return -1;
1.1       benno     201:                }
                    202:
1.7     ! florian   203:                st->total += blk->len;
        !           204:                st->offs += blk->len;
        !           205:                st->hint = blk->idx + 1;
        !           206:                return 0;
1.1       benno     207:        }
                    208:
                    209:        /* Emit remaining data and send terminator token. */
                    210:
1.7     ! florian   211:        sz = st->mapsz - last;
        !           212:        st->total += sz;
        !           213:        st->dirty += sz;
1.1       benno     214:
                    215:        LOG4(sess, "%s: flushing remaining %jd B", path, (intmax_t)sz);
                    216:
1.7     ! florian   217:        if (!blk_flush(sess, fd, st->map + last, sz, 0)) {
1.1       benno     218:                ERRX1(sess, "blk_flush");
1.7     ! florian   219:                return -1;
1.1       benno     220:        }
                    221:
                    222:        LOG3(sess, "%s: flushed (chunked) %jd B total, "
1.7     ! florian   223:                "%.2f%% upload ratio", path, (intmax_t)st->total,
        !           224:                100.0 * st->dirty / st->total);
1.1       benno     225:        return 1;
                    226: }
                    227:
                    228: /*
                    229:  * Given a local file "path" and the blocks created by a remote machine,
                    230:  * find out which blocks of our file they don't have and send them.
1.7     ! florian   231:  * This function is reentrant: it must be called while there's still
        !           232:  * data to send.
        !           233:  * Return 0 if there's more data to send, >0 if the file has completed
        !           234:  * its update, or <0 on error.
1.1       benno     235:  */
                    236: int
1.7     ! florian   237: blk_match(struct sess *sess, int fd, const struct blkset *blks,
        !           238:        const char *path, struct blkstat *st)
1.1       benno     239: {
                    240:        unsigned char    filemd[MD4_DIGEST_LENGTH];
1.7     ! florian   241:        int              c;
1.1       benno     242:
                    243:        /*
                    244:         * If the file's empty or we don't have any blocks from the
                    245:         * sender, then simply send the whole file.
                    246:         * Otherwise, run the hash matching routine and send raw chunks
                    247:         * and subsequent matching tokens.
                    248:         */
                    249:
1.7     ! florian   250:        if (st->mapsz && blks->blksz) {
        !           251:                if ((c = blk_match_send(sess, path, fd, blks, st)) < 0) {
1.1       benno     252:                        ERRX1(sess, "blk_match_send");
1.7     ! florian   253:                        return -1;
        !           254:                } else if (c == 0)
        !           255:                        return 0;
1.1       benno     256:        } else {
1.7     ! florian   257:                if (!blk_flush(sess, fd, st->map, st->mapsz, 0)) {
1.1       benno     258:                        ERRX1(sess, "blk_flush");
1.7     ! florian   259:                        return -1;
1.1       benno     260:                }
1.5       benno     261:                LOG3(sess, "%s: flushed (un-chunked) %jd B, 100%% upload ratio",
1.7     ! florian   262:                    path, (intmax_t)st->mapsz);
1.1       benno     263:        }
                    264:
1.2       benno     265:        /*
1.1       benno     266:         * Now write the full file hash.
                    267:         * Since we're seeding the hash, this always gives us some sort
                    268:         * of data even if the file's zero-length.
                    269:         */
                    270:
1.7     ! florian   271:        hash_file(st->map, st->mapsz, filemd, sess);
1.1       benno     272:
1.3       deraadt   273:        if (!io_write_buf(sess, fd, filemd, MD4_DIGEST_LENGTH)) {
1.1       benno     274:                ERRX1(sess, "io_write_buf");
1.7     ! florian   275:                return -1;
1.1       benno     276:        }
                    277:
1.7     ! florian   278:        return 1;
1.1       benno     279: }
                    280:
                    281: /*
                    282:  * Sent from the sender to the receiver to indicate that the block set
                    283:  * has been received.
                    284:  * Symmetrises blk_send_ack().
                    285:  * Returns zero on failure, non-zero on success.
                    286:  */
                    287: int
                    288: blk_recv_ack(struct sess *sess,
                    289:        int fd, const struct blkset *blocks, int32_t idx)
                    290: {
                    291:
                    292:        /* FIXME: put into static block. */
                    293:
1.3       deraadt   294:        if (!io_write_int(sess, fd, idx))
1.1       benno     295:                ERRX1(sess, "io_write_int");
1.3       deraadt   296:        else if (!io_write_int(sess, fd, blocks->blksz))
1.1       benno     297:                ERRX1(sess, "io_write_int");
1.3       deraadt   298:        else if (!io_write_int(sess, fd, blocks->len))
1.1       benno     299:                ERRX1(sess, "io_write_int");
1.3       deraadt   300:        else if (!io_write_int(sess, fd, blocks->csum))
1.1       benno     301:                ERRX1(sess, "io_write_int");
1.3       deraadt   302:        else if (!io_write_int(sess, fd, blocks->rem))
1.1       benno     303:                ERRX1(sess, "io_write_int");
                    304:        else
                    305:                return 1;
                    306:
                    307:        return 0;
                    308: }
                    309:
                    310: /*
                    311:  * Read all of the checksums for a file's blocks.
                    312:  * Returns the set of blocks or NULL on failure.
                    313:  */
                    314: struct blkset *
                    315: blk_recv(struct sess *sess, int fd, const char *path)
                    316: {
                    317:        struct blkset   *s;
                    318:        int32_t          i;
                    319:        size_t           j;
                    320:        struct blk      *b;
                    321:        off_t            offs = 0;
                    322:
1.4       deraadt   323:        if ((s = calloc(1, sizeof(struct blkset))) == NULL) {
1.1       benno     324:                ERR(sess, "calloc");
                    325:                return NULL;
                    326:        }
                    327:
                    328:        /*
                    329:         * The block prologue consists of a few values that we'll need
                    330:         * in reading the individual blocks for this file.
                    331:         * FIXME: read into buffer and unbuffer.
                    332:         */
                    333:
1.3       deraadt   334:        if (!io_read_size(sess, fd, &s->blksz)) {
1.1       benno     335:                ERRX1(sess, "io_read_size");
                    336:                goto out;
1.3       deraadt   337:        } else if (!io_read_size(sess, fd, &s->len)) {
1.1       benno     338:                ERRX1(sess, "io_read_size");
                    339:                goto out;
1.3       deraadt   340:        } else if (!io_read_size(sess, fd, &s->csum)) {
1.1       benno     341:                ERRX1(sess, "io_read_int");
                    342:                goto out;
1.3       deraadt   343:        } else if (!io_read_size(sess, fd, &s->rem)) {
1.1       benno     344:                ERRX1(sess, "io_read_int");
                    345:                goto out;
                    346:        } else if (s->rem && s->rem >= s->len) {
                    347:                ERRX(sess, "block remainder is "
                    348:                        "greater than block size");
                    349:                goto out;
                    350:        }
                    351:
                    352:        LOG3(sess, "%s: read block prologue: %zu blocks of "
                    353:                "%zu B, %zu B remainder, %zu B checksum", path,
                    354:                s->blksz, s->len, s->rem, s->csum);
                    355:
                    356:        if (s->blksz) {
                    357:                s->blks = calloc(s->blksz, sizeof(struct blk));
1.4       deraadt   358:                if (s->blks == NULL) {
1.1       benno     359:                        ERR(sess, "calloc");
                    360:                        goto out;
                    361:                }
                    362:        }
                    363:
1.2       benno     364:        /*
1.1       benno     365:         * Read each block individually.
                    366:         * FIXME: read buffer and unbuffer.
                    367:         */
                    368:
                    369:        for (j = 0; j < s->blksz; j++) {
                    370:                b = &s->blks[j];
1.3       deraadt   371:                if (!io_read_int(sess, fd, &i)) {
1.1       benno     372:                        ERRX1(sess, "io_read_int");
                    373:                        goto out;
                    374:                }
                    375:                b->chksum_short = i;
                    376:
                    377:                assert(s->csum <= sizeof(b->chksum_long));
1.3       deraadt   378:                if (!io_read_buf(sess,
1.1       benno     379:                    fd, b->chksum_long, s->csum)) {
                    380:                        ERRX1(sess, "io_read_buf");
                    381:                        goto out;
                    382:                }
                    383:
                    384:                /*
                    385:                 * If we're the last block, then we're assigned the
                    386:                 * remainder of the data.
                    387:                 */
                    388:
                    389:                b->offs = offs;
                    390:                b->idx = j;
                    391:                b->len = (j == (s->blksz - 1) && s->rem) ?
                    392:                        s->rem : s->len;
                    393:                offs += b->len;
                    394:
                    395:                LOG4(sess, "%s: read block %zu, "
                    396:                        "length %zu B", path, b->idx, b->len);
                    397:        }
                    398:
                    399:        s->size = offs;
                    400:        LOG3(sess, "%s: read blocks: %zu blocks, %jd B total "
                    401:                "blocked data", path, s->blksz, (intmax_t)s->size);
                    402:        return s;
                    403: out:
1.7     ! florian   404:        free(s->blks);
        !           405:        free(s);
1.1       benno     406:        return NULL;
                    407: }
                    408:
                    409: /*
                    410:  * Symmetrise blk_recv_ack(), except w/o the leading identifier.
                    411:  * Return zero on failure, non-zero on success.
                    412:  */
                    413: int
                    414: blk_send_ack(struct sess *sess, int fd, struct blkset *p)
                    415: {
                    416:        char     buf[16];
                    417:        size_t   pos = 0, sz;
                    418:
                    419:        /* Put the entire send routine into a buffer. */
                    420:
                    421:        sz = sizeof(int32_t) + /* block count */
                    422:             sizeof(int32_t) + /* block length */
                    423:             sizeof(int32_t) + /* checksum length */
                    424:             sizeof(int32_t); /* block remainder */
                    425:        assert(sz <= sizeof(buf));
                    426:
1.3       deraadt   427:        if (!io_read_buf(sess, fd, buf, sz)) {
1.1       benno     428:                ERRX1(sess, "io_read_buf");
                    429:                return 0;
                    430:        }
                    431:
1.3       deraadt   432:        if (!io_unbuffer_size(sess, buf, &pos, sz, &p->blksz))
1.1       benno     433:                ERRX1(sess, "io_unbuffer_size");
1.3       deraadt   434:        else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->len))
1.1       benno     435:                ERRX1(sess, "io_unbuffer_size");
1.3       deraadt   436:        else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->csum))
1.1       benno     437:                ERRX1(sess, "io_unbuffer_size");
1.3       deraadt   438:        else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->rem))
1.1       benno     439:                ERRX1(sess, "io_unbuffer_size");
                    440:        else if (p->len && p->rem >= p->len)
                    441:                ERRX1(sess, "non-zero length is less than remainder");
1.4       deraadt   442:        else if (p->csum == 0 || p->csum > 16)
1.1       benno     443:                ERRX1(sess, "inappropriate checksum length");
                    444:        else
                    445:                return 1;
                    446:
                    447:        return 0;
                    448: }
                    449:
                    450: /*
                    451:  * Transmit the metadata for set and blocks.
                    452:  * Return zero on failure, non-zero on success.
                    453:  */
                    454: int
                    455: blk_send(struct sess *sess, int fd, size_t idx,
                    456:        const struct blkset *p, const char *path)
                    457: {
                    458:        char    *buf;
                    459:        size_t   i, pos = 0, sz;
                    460:        int      rc = 0;
                    461:
                    462:        /* Put the entire send routine into a buffer. */
                    463:
                    464:        sz = sizeof(int32_t) + /* identifier */
1.2       benno     465:            sizeof(int32_t) + /* block count */
                    466:            sizeof(int32_t) + /* block length */
                    467:            sizeof(int32_t) + /* checksum length */
                    468:            sizeof(int32_t) + /* block remainder */
                    469:            p->blksz *
                    470:            (sizeof(int32_t) + /* short checksum */
                    471:                p->csum); /* long checksum */
1.1       benno     472:
1.4       deraadt   473:        if ((buf = malloc(sz)) == NULL) {
1.1       benno     474:                ERR(sess, "malloc");
                    475:                return 0;
                    476:        }
                    477:
                    478:        io_buffer_int(sess, buf, &pos, sz, idx);
                    479:        io_buffer_int(sess, buf, &pos, sz, p->blksz);
                    480:        io_buffer_int(sess, buf, &pos, sz, p->len);
                    481:        io_buffer_int(sess, buf, &pos, sz, p->csum);
                    482:        io_buffer_int(sess, buf, &pos, sz, p->rem);
                    483:
                    484:        for (i = 0; i < p->blksz; i++) {
1.2       benno     485:                io_buffer_int(sess, buf, &pos,
1.1       benno     486:                        sz, p->blks[i].chksum_short);
1.2       benno     487:                io_buffer_buf(sess, buf, &pos, sz,
1.1       benno     488:                        p->blks[i].chksum_long, p->csum);
                    489:        }
                    490:
                    491:        assert(pos == sz);
                    492:
1.3       deraadt   493:        if (!io_write_buf(sess, fd, buf, sz)) {
1.1       benno     494:                ERRX1(sess, "io_write_buf");
                    495:                goto out;
                    496:        }
                    497:
                    498:        LOG3(sess, "%s: sent block prologue: %zu blocks of %zu B, "
                    499:                "%zu B remainder, %zu B checksum", path,
                    500:                p->blksz, p->len, p->rem, p->csum);
                    501:        rc = 1;
                    502: out:
                    503:        free(buf);
                    504:        return rc;
                    505: }