[BACK]Return to blocks.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / rsync

Annotation of src/usr.bin/rsync/blocks.c, Revision 1.15

1.15    ! benno       1: /*     $Id: blocks.c,v 1.14 2019/03/23 16:04:28 deraadt Exp $ */
1.1       benno       2: /*
                      3:  * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
                      4:  *
                      5:  * Permission to use, copy, modify, and distribute this software for any
                      6:  * purpose with or without fee is hereby granted, provided that the above
                      7:  * copyright notice and this permission notice appear in all copies.
                      8:  *
                      9:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     10:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     11:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     12:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     13:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     14:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     15:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
                     16:  */
                     17: #include <sys/stat.h>
                     18:
                     19: #include <assert.h>
                     20: #include <endian.h>
                     21: #include <errno.h>
                     22: #include <inttypes.h>
                     23: #include <stdio.h>
                     24: #include <stdlib.h>
                     25: #include <string.h>
                     26: #include <unistd.h>
                     27:
1.6       tb         28: #include <openssl/md4.h>
                     29:
1.1       benno      30: #include "extern.h"
                     31:
                     32: /*
                     33:  * From our current position of "offs" in buffer "buf" of total size
                     34:  * "size", see if we can find a matching block in our list of blocks.
                     35:  * The "hint" refers to the block that *might* work.
                     36:  * Returns the blk or NULL if no matching block was found.
                     37:  */
                     38: static struct blk *
                     39: blk_find(struct sess *sess, const void *buf, off_t size, off_t offs,
                     40:        const struct blkset *blks, const char *path, size_t hint)
                     41: {
                     42:        unsigned char    md[MD4_DIGEST_LENGTH];
                     43:        uint32_t         fhash;
                     44:        off_t            remain, osz;
                     45:        size_t           i;
                     46:        int              have_md = 0;
                     47:
                     48:        /*
                     49:         * First, compute our fast hash.
                     50:         * FIXME: yes, this can be a rolling computation, but I'm
                     51:         * deliberately making it simple first.
                     52:         */
                     53:
                     54:        remain = size - offs;
                     55:        assert(remain);
                     56:        osz = remain < (off_t)blks->len ? remain : (off_t)blks->len;
                     57:        fhash = hash_fast(buf + offs, (size_t)osz);
                     58:        have_md = 0;
                     59:
                     60:        /*
                     61:         * Start with our match hint.
                     62:         * This just runs the fast and slow check with the hint.
                     63:         */
                     64:
                     65:        if (hint < blks->blksz &&
                     66:            fhash == blks->blks[hint].chksum_short &&
                     67:            (size_t)osz == blks->blks[hint].len) {
                     68:                hash_slow(buf + offs, (size_t)osz, md, sess);
                     69:                have_md = 1;
1.4       deraadt    70:                if (memcmp(md, blks->blks[hint].chksum_long, blks->csum) == 0) {
1.15    ! benno      71:                        LOG4("%s: found matching hinted match: "
1.14      deraadt    72:                            "position %jd, block %zu (position %jd, size %zu)",
                     73:                            path,
                     74:                            (intmax_t)offs, blks->blks[hint].idx,
                     75:                            (intmax_t)blks->blks[hint].offs,
                     76:                            blks->blks[hint].len);
1.1       benno      77:                        return &blks->blks[hint];
                     78:                }
                     79:        }
                     80:
                     81:        /*
                     82:         * Now loop and look for the fast hash.
                     83:         * If it's found, move on to the slow hash.
                     84:         */
                     85:
                     86:        for (i = 0; i < blks->blksz; i++) {
                     87:                if (fhash != blks->blks[i].chksum_short)
                     88:                        continue;
                     89:                if ((size_t)osz != blks->blks[i].len)
                     90:                        continue;
                     91:
1.15    ! benno      92:                LOG4("%s: found matching fast match: "
1.14      deraadt    93:                    "position %jd, block %zu (position %jd, size %zu)",
                     94:                    path,
                     95:                    (intmax_t)offs, blks->blks[i].idx,
                     96:                    (intmax_t)blks->blks[i].offs,
                     97:                    blks->blks[i].len);
1.1       benno      98:
                     99:                /* Compute slow hash on demand. */
                    100:
1.4       deraadt   101:                if (have_md == 0) {
1.1       benno     102:                        hash_slow(buf + offs, (size_t)osz, md, sess);
                    103:                        have_md = 1;
                    104:                }
                    105:
                    106:                if (memcmp(md, blks->blks[i].chksum_long, blks->csum))
                    107:                        continue;
                    108:
1.15    ! benno     109:                LOG4("%s: sender verifies slow match", path);
1.1       benno     110:                return &blks->blks[i];
                    111:        }
                    112:
                    113:        return NULL;
                    114: }
                    115:
                    116: /*
                    117:  * Given a local file "path" and the blocks created by a remote machine,
                    118:  * find out which blocks of our file they don't have and send them.
1.7       florian   119:  * This function is reentrant: it must be called while there's still
                    120:  * data to send.
1.1       benno     121:  */
1.9       florian   122: void
                    123: blk_match(struct sess *sess, const struct blkset *blks,
1.7       florian   124:        const char *path, struct blkstat *st)
1.1       benno     125: {
1.9       florian   126:        off_t            last, end, sz;
                    127:        int32_t          tok;
                    128:        struct blk      *blk;
1.1       benno     129:
                    130:        /*
                    131:         * If the file's empty or we don't have any blocks from the
                    132:         * sender, then simply send the whole file.
                    133:         * Otherwise, run the hash matching routine and send raw chunks
                    134:         * and subsequent matching tokens.
                    135:         */
                    136:
1.7       florian   137:        if (st->mapsz && blks->blksz) {
1.9       florian   138:                /*
                    139:                 * Stop searching at the length of the file minus the
                    140:                 * size of the last block.
                    141:                 * The reason for this being that we don't need to do an
                    142:                 * incremental hash within the last block---if it
                    143:                 * doesn't match, it doesn't match.
                    144:                 */
                    145:
                    146:                end = st->mapsz + 1 - blks->blks[blks->blksz - 1].len;
                    147:                last = st->offs;
                    148:
                    149:                for ( ; st->offs < end; st->offs++) {
                    150:                        blk = blk_find(sess, st->map, st->mapsz,
                    151:                                st->offs, blks, path, st->hint);
                    152:                        if (blk == NULL)
                    153:                                continue;
                    154:
                    155:                        sz = st->offs - last;
                    156:                        st->dirty += sz;
                    157:                        st->total += sz;
1.15    ! benno     158:                        LOG4("%s: flushing %jd B before %zu B block %zu",
1.14      deraadt   159:                            path, (intmax_t)sz,
                    160:                            blk->len, blk->idx);
1.9       florian   161:                        tok = -(blk->idx + 1);
                    162:
                    163:                        /*
                    164:                         * Write the data we have, then follow it with
                    165:                         * the tag of the block that matches.
                    166:                         */
                    167:
                    168:                        st->curpos = last;
                    169:                        st->curlen = st->curpos + sz;
                    170:                        st->curtok = tok;
1.10      deraadt   171:                        assert(st->curtok != 0);
1.9       florian   172:                        st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
                    173:                        st->total += blk->len;
                    174:                        st->offs += blk->len;
                    175:                        st->hint = blk->idx + 1;
                    176:                        return;
1.1       benno     177:                }
                    178:
1.9       florian   179:                /* Emit remaining data and send terminator token. */
                    180:
                    181:                sz = st->mapsz - last;
1.15    ! benno     182:                LOG4("%s: flushing remaining %jd B",
1.14      deraadt   183:                    path, (intmax_t)sz);
1.1       benno     184:
1.9       florian   185:                st->total += sz;
                    186:                st->dirty += sz;
                    187:                st->curpos = last;
                    188:                st->curlen = st->curpos + sz;
                    189:                st->curtok = 0;
                    190:                st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
                    191:        } else {
                    192:                st->curpos = 0;
                    193:                st->curlen = st->mapsz;
                    194:                st->curtok = 0;
                    195:                st->curst = st->mapsz ? BLKSTAT_DATA : BLKSTAT_TOK;
                    196:                st->dirty = st->total = st->mapsz;
1.1       benno     197:
1.15    ! benno     198:                LOG4("%s: flushing whole file %zu B",
1.14      deraadt   199:                    path, st->mapsz);
1.1       benno     200:        }
                    201: }
                    202:
                    203: /*
1.13      benno     204:  * Buffer the message from sender to the receiver indicating that the
                    205:  * block set has been received.
1.1       benno     206:  * Symmetrises blk_send_ack().
                    207:  */
1.13      benno     208: void
                    209: blk_recv_ack(struct sess *sess, char buf[20],
                    210:        const struct blkset *blocks, int32_t idx)
1.1       benno     211: {
1.8       florian   212:        size_t   pos = 0, sz;
1.1       benno     213:
1.8       florian   214:        sz = sizeof(int32_t) + /* index */
                    215:             sizeof(int32_t) + /* block count */
                    216:             sizeof(int32_t) + /* block length */
                    217:             sizeof(int32_t) + /* checksum length */
                    218:             sizeof(int32_t); /* block remainder */
1.13      benno     219:        assert(sz == 20);
1.8       florian   220:
                    221:        io_buffer_int(sess, buf, &pos, sz, idx);
                    222:        io_buffer_int(sess, buf, &pos, sz, blocks->blksz);
                    223:        io_buffer_int(sess, buf, &pos, sz, blocks->len);
                    224:        io_buffer_int(sess, buf, &pos, sz, blocks->csum);
                    225:        io_buffer_int(sess, buf, &pos, sz, blocks->rem);
                    226:        assert(pos == sz);
1.1       benno     227: }
                    228:
                    229: /*
                    230:  * Read all of the checksums for a file's blocks.
                    231:  * Returns the set of blocks or NULL on failure.
                    232:  */
                    233: struct blkset *
                    234: blk_recv(struct sess *sess, int fd, const char *path)
                    235: {
                    236:        struct blkset   *s;
                    237:        int32_t          i;
                    238:        size_t           j;
                    239:        struct blk      *b;
                    240:        off_t            offs = 0;
                    241:
1.4       deraadt   242:        if ((s = calloc(1, sizeof(struct blkset))) == NULL) {
1.15    ! benno     243:                ERR("calloc");
1.1       benno     244:                return NULL;
                    245:        }
                    246:
                    247:        /*
                    248:         * The block prologue consists of a few values that we'll need
                    249:         * in reading the individual blocks for this file.
                    250:         * FIXME: read into buffer and unbuffer.
                    251:         */
                    252:
1.3       deraadt   253:        if (!io_read_size(sess, fd, &s->blksz)) {
1.15    ! benno     254:                ERRX1("io_read_size");
1.1       benno     255:                goto out;
1.3       deraadt   256:        } else if (!io_read_size(sess, fd, &s->len)) {
1.15    ! benno     257:                ERRX1("io_read_size");
1.1       benno     258:                goto out;
1.3       deraadt   259:        } else if (!io_read_size(sess, fd, &s->csum)) {
1.15    ! benno     260:                ERRX1("io_read_int");
1.1       benno     261:                goto out;
1.3       deraadt   262:        } else if (!io_read_size(sess, fd, &s->rem)) {
1.15    ! benno     263:                ERRX1("io_read_int");
1.1       benno     264:                goto out;
                    265:        } else if (s->rem && s->rem >= s->len) {
1.15    ! benno     266:                ERRX("block remainder is "
1.1       benno     267:                        "greater than block size");
                    268:                goto out;
                    269:        }
                    270:
1.15    ! benno     271:        LOG3("%s: read block prologue: %zu blocks of "
1.14      deraadt   272:            "%zu B, %zu B remainder, %zu B checksum", path,
                    273:            s->blksz, s->len, s->rem, s->csum);
1.1       benno     274:
                    275:        if (s->blksz) {
                    276:                s->blks = calloc(s->blksz, sizeof(struct blk));
1.4       deraadt   277:                if (s->blks == NULL) {
1.15    ! benno     278:                        ERR("calloc");
1.1       benno     279:                        goto out;
                    280:                }
                    281:        }
                    282:
1.2       benno     283:        /*
1.1       benno     284:         * Read each block individually.
                    285:         * FIXME: read buffer and unbuffer.
                    286:         */
                    287:
                    288:        for (j = 0; j < s->blksz; j++) {
                    289:                b = &s->blks[j];
1.3       deraadt   290:                if (!io_read_int(sess, fd, &i)) {
1.15    ! benno     291:                        ERRX1("io_read_int");
1.1       benno     292:                        goto out;
                    293:                }
                    294:                b->chksum_short = i;
                    295:
                    296:                assert(s->csum <= sizeof(b->chksum_long));
1.3       deraadt   297:                if (!io_read_buf(sess,
1.1       benno     298:                    fd, b->chksum_long, s->csum)) {
1.15    ! benno     299:                        ERRX1("io_read_buf");
1.1       benno     300:                        goto out;
                    301:                }
                    302:
                    303:                /*
                    304:                 * If we're the last block, then we're assigned the
                    305:                 * remainder of the data.
                    306:                 */
                    307:
                    308:                b->offs = offs;
                    309:                b->idx = j;
                    310:                b->len = (j == (s->blksz - 1) && s->rem) ?
                    311:                        s->rem : s->len;
                    312:                offs += b->len;
                    313:
1.15    ! benno     314:                LOG4("%s: read block %zu, length %zu B",
1.14      deraadt   315:                    path, b->idx, b->len);
1.1       benno     316:        }
                    317:
                    318:        s->size = offs;
1.15    ! benno     319:        LOG3("%s: read blocks: %zu blocks, %jd B total blocked data",
1.14      deraadt   320:            path, s->blksz, (intmax_t)s->size);
1.1       benno     321:        return s;
                    322: out:
1.7       florian   323:        free(s->blks);
                    324:        free(s);
1.1       benno     325:        return NULL;
                    326: }
                    327:
                    328: /*
                    329:  * Symmetrise blk_recv_ack(), except w/o the leading identifier.
                    330:  * Return zero on failure, non-zero on success.
                    331:  */
                    332: int
                    333: blk_send_ack(struct sess *sess, int fd, struct blkset *p)
                    334: {
                    335:        char     buf[16];
                    336:        size_t   pos = 0, sz;
                    337:
                    338:        /* Put the entire send routine into a buffer. */
                    339:
                    340:        sz = sizeof(int32_t) + /* block count */
                    341:             sizeof(int32_t) + /* block length */
                    342:             sizeof(int32_t) + /* checksum length */
                    343:             sizeof(int32_t); /* block remainder */
                    344:        assert(sz <= sizeof(buf));
                    345:
1.3       deraadt   346:        if (!io_read_buf(sess, fd, buf, sz)) {
1.15    ! benno     347:                ERRX1("io_read_buf");
1.1       benno     348:                return 0;
                    349:        }
                    350:
1.3       deraadt   351:        if (!io_unbuffer_size(sess, buf, &pos, sz, &p->blksz))
1.15    ! benno     352:                ERRX1("io_unbuffer_size");
1.3       deraadt   353:        else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->len))
1.15    ! benno     354:                ERRX1("io_unbuffer_size");
1.3       deraadt   355:        else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->csum))
1.15    ! benno     356:                ERRX1("io_unbuffer_size");
1.3       deraadt   357:        else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->rem))
1.15    ! benno     358:                ERRX1("io_unbuffer_size");
1.1       benno     359:        else if (p->len && p->rem >= p->len)
1.15    ! benno     360:                ERRX1("non-zero length is less than remainder");
1.4       deraadt   361:        else if (p->csum == 0 || p->csum > 16)
1.15    ! benno     362:                ERRX1("inappropriate checksum length");
1.1       benno     363:        else
                    364:                return 1;
                    365:
                    366:        return 0;
                    367: }
                    368:
                    369: /*
                    370:  * Transmit the metadata for set and blocks.
                    371:  * Return zero on failure, non-zero on success.
                    372:  */
                    373: int
                    374: blk_send(struct sess *sess, int fd, size_t idx,
                    375:        const struct blkset *p, const char *path)
                    376: {
                    377:        char    *buf;
                    378:        size_t   i, pos = 0, sz;
                    379:        int      rc = 0;
                    380:
                    381:        /* Put the entire send routine into a buffer. */
                    382:
                    383:        sz = sizeof(int32_t) + /* identifier */
1.2       benno     384:            sizeof(int32_t) + /* block count */
                    385:            sizeof(int32_t) + /* block length */
                    386:            sizeof(int32_t) + /* checksum length */
                    387:            sizeof(int32_t) + /* block remainder */
                    388:            p->blksz *
                    389:            (sizeof(int32_t) + /* short checksum */
                    390:                p->csum); /* long checksum */
1.1       benno     391:
1.4       deraadt   392:        if ((buf = malloc(sz)) == NULL) {
1.15    ! benno     393:                ERR("malloc");
1.1       benno     394:                return 0;
                    395:        }
                    396:
                    397:        io_buffer_int(sess, buf, &pos, sz, idx);
                    398:        io_buffer_int(sess, buf, &pos, sz, p->blksz);
                    399:        io_buffer_int(sess, buf, &pos, sz, p->len);
                    400:        io_buffer_int(sess, buf, &pos, sz, p->csum);
                    401:        io_buffer_int(sess, buf, &pos, sz, p->rem);
                    402:
                    403:        for (i = 0; i < p->blksz; i++) {
1.2       benno     404:                io_buffer_int(sess, buf, &pos,
1.1       benno     405:                        sz, p->blks[i].chksum_short);
1.2       benno     406:                io_buffer_buf(sess, buf, &pos, sz,
1.1       benno     407:                        p->blks[i].chksum_long, p->csum);
                    408:        }
                    409:
                    410:        assert(pos == sz);
                    411:
1.3       deraadt   412:        if (!io_write_buf(sess, fd, buf, sz)) {
1.15    ! benno     413:                ERRX1("io_write_buf");
1.1       benno     414:                goto out;
                    415:        }
                    416:
1.15    ! benno     417:        LOG3("%s: sent block prologue: %zu blocks of %zu B, "
1.14      deraadt   418:            "%zu B remainder, %zu B checksum",
                    419:            path, p->blksz, p->len, p->rem, p->csum);
1.1       benno     420:        rc = 1;
                    421: out:
                    422:        free(buf);
                    423:        return rc;
                    424: }