Annotation of src/usr.bin/rsync/blocks.c, Revision 1.9
1.9 ! florian 1: /* $Id: blocks.c,v 1.8 2019/02/16 16:57:48 florian Exp $ */
1.1 benno 2: /*
3: * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18:
19: #include <assert.h>
20: #include <endian.h>
21: #include <errno.h>
22: #include <inttypes.h>
23: #include <stdio.h>
24: #include <stdlib.h>
25: #include <string.h>
26: #include <unistd.h>
27:
1.6 tb 28: #include <openssl/md4.h>
29:
1.1 benno 30: #include "extern.h"
31:
32: /*
33: * From our current position of "offs" in buffer "buf" of total size
34: * "size", see if we can find a matching block in our list of blocks.
35: * The "hint" refers to the block that *might* work.
36: * Returns the blk or NULL if no matching block was found.
37: */
38: static struct blk *
39: blk_find(struct sess *sess, const void *buf, off_t size, off_t offs,
40: const struct blkset *blks, const char *path, size_t hint)
41: {
42: unsigned char md[MD4_DIGEST_LENGTH];
43: uint32_t fhash;
44: off_t remain, osz;
45: size_t i;
46: int have_md = 0;
47:
48: /*
49: * First, compute our fast hash.
50: * FIXME: yes, this can be a rolling computation, but I'm
51: * deliberately making it simple first.
52: */
53:
54: remain = size - offs;
55: assert(remain);
56: osz = remain < (off_t)blks->len ? remain : (off_t)blks->len;
57: fhash = hash_fast(buf + offs, (size_t)osz);
58: have_md = 0;
59:
60: /*
61: * Start with our match hint.
62: * This just runs the fast and slow check with the hint.
63: */
64:
65: if (hint < blks->blksz &&
66: fhash == blks->blks[hint].chksum_short &&
67: (size_t)osz == blks->blks[hint].len) {
68: hash_slow(buf + offs, (size_t)osz, md, sess);
69: have_md = 1;
1.4 deraadt 70: if (memcmp(md, blks->blks[hint].chksum_long, blks->csum) == 0) {
1.1 benno 71: LOG4(sess, "%s: found matching hinted match: "
72: "position %jd, block %zu "
73: "(position %jd, size %zu)", path,
74: (intmax_t)offs, blks->blks[hint].idx,
75: (intmax_t)blks->blks[hint].offs,
76: blks->blks[hint].len);
77: return &blks->blks[hint];
78: }
79: }
80:
81: /*
82: * Now loop and look for the fast hash.
83: * If it's found, move on to the slow hash.
84: */
85:
86: for (i = 0; i < blks->blksz; i++) {
87: if (fhash != blks->blks[i].chksum_short)
88: continue;
89: if ((size_t)osz != blks->blks[i].len)
90: continue;
91:
92: LOG4(sess, "%s: found matching fast match: "
93: "position %jd, block %zu "
94: "(position %jd, size %zu)", path,
95: (intmax_t)offs, blks->blks[i].idx,
96: (intmax_t)blks->blks[i].offs,
97: blks->blks[i].len);
98:
99: /* Compute slow hash on demand. */
100:
1.4 deraadt 101: if (have_md == 0) {
1.1 benno 102: hash_slow(buf + offs, (size_t)osz, md, sess);
103: have_md = 1;
104: }
105:
106: if (memcmp(md, blks->blks[i].chksum_long, blks->csum))
107: continue;
108:
109: LOG4(sess, "%s: sender verifies slow match", path);
110: return &blks->blks[i];
111: }
112:
113: return NULL;
114: }
115:
116: /*
117: * Given a local file "path" and the blocks created by a remote machine,
118: * find out which blocks of our file they don't have and send them.
1.7 florian 119: * This function is reentrant: it must be called while there's still
120: * data to send.
1.1 benno 121: */
1.9 ! florian 122: void
! 123: blk_match(struct sess *sess, const struct blkset *blks,
1.7 florian 124: const char *path, struct blkstat *st)
1.1 benno 125: {
1.9 ! florian 126: off_t last, end, sz;
! 127: int32_t tok;
! 128: struct blk *blk;
1.1 benno 129:
130: /*
131: * If the file's empty or we don't have any blocks from the
132: * sender, then simply send the whole file.
133: * Otherwise, run the hash matching routine and send raw chunks
134: * and subsequent matching tokens.
135: */
136:
1.7 florian 137: if (st->mapsz && blks->blksz) {
1.9 ! florian 138: /*
! 139: * Stop searching at the length of the file minus the
! 140: * size of the last block.
! 141: * The reason for this being that we don't need to do an
! 142: * incremental hash within the last block---if it
! 143: * doesn't match, it doesn't match.
! 144: */
! 145:
! 146: end = st->mapsz + 1 - blks->blks[blks->blksz - 1].len;
! 147: last = st->offs;
! 148:
! 149: for ( ; st->offs < end; st->offs++) {
! 150: blk = blk_find(sess, st->map, st->mapsz,
! 151: st->offs, blks, path, st->hint);
! 152: if (blk == NULL)
! 153: continue;
! 154:
! 155: sz = st->offs - last;
! 156: st->dirty += sz;
! 157: st->total += sz;
! 158: LOG4(sess, "%s: flushing %jd B before %zu B "
! 159: "block %zu", path, (intmax_t)sz,
! 160: blk->len, blk->idx);
! 161: tok = -(blk->idx + 1);
! 162:
! 163: /*
! 164: * Write the data we have, then follow it with
! 165: * the tag of the block that matches.
! 166: */
! 167:
! 168: st->curpos = last;
! 169: st->curlen = st->curpos + sz;
! 170: st->curtok = tok;
! 171: assert(0 != st->curtok);
! 172: st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
! 173: st->total += blk->len;
! 174: st->offs += blk->len;
! 175: st->hint = blk->idx + 1;
! 176: return;
1.1 benno 177: }
178:
1.9 ! florian 179: /* Emit remaining data and send terminator token. */
! 180:
! 181: sz = st->mapsz - last;
! 182: LOG4(sess, "%s: flushing remaining %jd B",
! 183: path, (intmax_t)sz);
1.1 benno 184:
1.9 ! florian 185: st->total += sz;
! 186: st->dirty += sz;
! 187: st->curpos = last;
! 188: st->curlen = st->curpos + sz;
! 189: st->curtok = 0;
! 190: st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
! 191: } else {
! 192: st->curpos = 0;
! 193: st->curlen = st->mapsz;
! 194: st->curtok = 0;
! 195: st->curst = st->mapsz ? BLKSTAT_DATA : BLKSTAT_TOK;
! 196: st->dirty = st->total = st->mapsz;
1.1 benno 197:
1.9 ! florian 198: LOG4(sess, "%s: flushing whole file %zu B",
! 199: path, st->mapsz);
1.1 benno 200: }
201: }
202:
203: /*
204: * Sent from the sender to the receiver to indicate that the block set
205: * has been received.
206: * Symmetrises blk_send_ack().
207: * Returns zero on failure, non-zero on success.
208: */
209: int
210: blk_recv_ack(struct sess *sess,
211: int fd, const struct blkset *blocks, int32_t idx)
212: {
1.8 florian 213: char buf[20];
214: size_t pos = 0, sz;
1.1 benno 215:
1.8 florian 216: sz = sizeof(int32_t) + /* index */
217: sizeof(int32_t) + /* block count */
218: sizeof(int32_t) + /* block length */
219: sizeof(int32_t) + /* checksum length */
220: sizeof(int32_t); /* block remainder */
221: assert(sz <= sizeof(buf));
222:
223: io_buffer_int(sess, buf, &pos, sz, idx);
224: io_buffer_int(sess, buf, &pos, sz, blocks->blksz);
225: io_buffer_int(sess, buf, &pos, sz, blocks->len);
226: io_buffer_int(sess, buf, &pos, sz, blocks->csum);
227: io_buffer_int(sess, buf, &pos, sz, blocks->rem);
228: assert(pos == sz);
1.1 benno 229:
1.8 florian 230: if (!io_write_buf(sess, fd, buf, sz)) {
231: ERRX1(sess, "io_write_buf");
232: return 0;
233: }
1.1 benno 234:
1.8 florian 235: return 1;
1.1 benno 236: }
237:
238: /*
239: * Read all of the checksums for a file's blocks.
240: * Returns the set of blocks or NULL on failure.
241: */
242: struct blkset *
243: blk_recv(struct sess *sess, int fd, const char *path)
244: {
245: struct blkset *s;
246: int32_t i;
247: size_t j;
248: struct blk *b;
249: off_t offs = 0;
250:
1.4 deraadt 251: if ((s = calloc(1, sizeof(struct blkset))) == NULL) {
1.1 benno 252: ERR(sess, "calloc");
253: return NULL;
254: }
255:
256: /*
257: * The block prologue consists of a few values that we'll need
258: * in reading the individual blocks for this file.
259: * FIXME: read into buffer and unbuffer.
260: */
261:
1.3 deraadt 262: if (!io_read_size(sess, fd, &s->blksz)) {
1.1 benno 263: ERRX1(sess, "io_read_size");
264: goto out;
1.3 deraadt 265: } else if (!io_read_size(sess, fd, &s->len)) {
1.1 benno 266: ERRX1(sess, "io_read_size");
267: goto out;
1.3 deraadt 268: } else if (!io_read_size(sess, fd, &s->csum)) {
1.1 benno 269: ERRX1(sess, "io_read_int");
270: goto out;
1.3 deraadt 271: } else if (!io_read_size(sess, fd, &s->rem)) {
1.1 benno 272: ERRX1(sess, "io_read_int");
273: goto out;
274: } else if (s->rem && s->rem >= s->len) {
275: ERRX(sess, "block remainder is "
276: "greater than block size");
277: goto out;
278: }
279:
280: LOG3(sess, "%s: read block prologue: %zu blocks of "
281: "%zu B, %zu B remainder, %zu B checksum", path,
282: s->blksz, s->len, s->rem, s->csum);
283:
284: if (s->blksz) {
285: s->blks = calloc(s->blksz, sizeof(struct blk));
1.4 deraadt 286: if (s->blks == NULL) {
1.1 benno 287: ERR(sess, "calloc");
288: goto out;
289: }
290: }
291:
1.2 benno 292: /*
1.1 benno 293: * Read each block individually.
294: * FIXME: read buffer and unbuffer.
295: */
296:
297: for (j = 0; j < s->blksz; j++) {
298: b = &s->blks[j];
1.3 deraadt 299: if (!io_read_int(sess, fd, &i)) {
1.1 benno 300: ERRX1(sess, "io_read_int");
301: goto out;
302: }
303: b->chksum_short = i;
304:
305: assert(s->csum <= sizeof(b->chksum_long));
1.3 deraadt 306: if (!io_read_buf(sess,
1.1 benno 307: fd, b->chksum_long, s->csum)) {
308: ERRX1(sess, "io_read_buf");
309: goto out;
310: }
311:
312: /*
313: * If we're the last block, then we're assigned the
314: * remainder of the data.
315: */
316:
317: b->offs = offs;
318: b->idx = j;
319: b->len = (j == (s->blksz - 1) && s->rem) ?
320: s->rem : s->len;
321: offs += b->len;
322:
323: LOG4(sess, "%s: read block %zu, "
324: "length %zu B", path, b->idx, b->len);
325: }
326:
327: s->size = offs;
328: LOG3(sess, "%s: read blocks: %zu blocks, %jd B total "
329: "blocked data", path, s->blksz, (intmax_t)s->size);
330: return s;
331: out:
1.7 florian 332: free(s->blks);
333: free(s);
1.1 benno 334: return NULL;
335: }
336:
337: /*
338: * Symmetrise blk_recv_ack(), except w/o the leading identifier.
339: * Return zero on failure, non-zero on success.
340: */
341: int
342: blk_send_ack(struct sess *sess, int fd, struct blkset *p)
343: {
344: char buf[16];
345: size_t pos = 0, sz;
346:
347: /* Put the entire send routine into a buffer. */
348:
349: sz = sizeof(int32_t) + /* block count */
350: sizeof(int32_t) + /* block length */
351: sizeof(int32_t) + /* checksum length */
352: sizeof(int32_t); /* block remainder */
353: assert(sz <= sizeof(buf));
354:
1.3 deraadt 355: if (!io_read_buf(sess, fd, buf, sz)) {
1.1 benno 356: ERRX1(sess, "io_read_buf");
357: return 0;
358: }
359:
1.3 deraadt 360: if (!io_unbuffer_size(sess, buf, &pos, sz, &p->blksz))
1.1 benno 361: ERRX1(sess, "io_unbuffer_size");
1.3 deraadt 362: else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->len))
1.1 benno 363: ERRX1(sess, "io_unbuffer_size");
1.3 deraadt 364: else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->csum))
1.1 benno 365: ERRX1(sess, "io_unbuffer_size");
1.3 deraadt 366: else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->rem))
1.1 benno 367: ERRX1(sess, "io_unbuffer_size");
368: else if (p->len && p->rem >= p->len)
369: ERRX1(sess, "non-zero length is less than remainder");
1.4 deraadt 370: else if (p->csum == 0 || p->csum > 16)
1.1 benno 371: ERRX1(sess, "inappropriate checksum length");
372: else
373: return 1;
374:
375: return 0;
376: }
377:
378: /*
379: * Transmit the metadata for set and blocks.
380: * Return zero on failure, non-zero on success.
381: */
382: int
383: blk_send(struct sess *sess, int fd, size_t idx,
384: const struct blkset *p, const char *path)
385: {
386: char *buf;
387: size_t i, pos = 0, sz;
388: int rc = 0;
389:
390: /* Put the entire send routine into a buffer. */
391:
392: sz = sizeof(int32_t) + /* identifier */
1.2 benno 393: sizeof(int32_t) + /* block count */
394: sizeof(int32_t) + /* block length */
395: sizeof(int32_t) + /* checksum length */
396: sizeof(int32_t) + /* block remainder */
397: p->blksz *
398: (sizeof(int32_t) + /* short checksum */
399: p->csum); /* long checksum */
1.1 benno 400:
1.4 deraadt 401: if ((buf = malloc(sz)) == NULL) {
1.1 benno 402: ERR(sess, "malloc");
403: return 0;
404: }
405:
406: io_buffer_int(sess, buf, &pos, sz, idx);
407: io_buffer_int(sess, buf, &pos, sz, p->blksz);
408: io_buffer_int(sess, buf, &pos, sz, p->len);
409: io_buffer_int(sess, buf, &pos, sz, p->csum);
410: io_buffer_int(sess, buf, &pos, sz, p->rem);
411:
412: for (i = 0; i < p->blksz; i++) {
1.2 benno 413: io_buffer_int(sess, buf, &pos,
1.1 benno 414: sz, p->blks[i].chksum_short);
1.2 benno 415: io_buffer_buf(sess, buf, &pos, sz,
1.1 benno 416: p->blks[i].chksum_long, p->csum);
417: }
418:
419: assert(pos == sz);
420:
1.3 deraadt 421: if (!io_write_buf(sess, fd, buf, sz)) {
1.1 benno 422: ERRX1(sess, "io_write_buf");
423: goto out;
424: }
425:
426: LOG3(sess, "%s: sent block prologue: %zu blocks of %zu B, "
427: "%zu B remainder, %zu B checksum", path,
428: p->blksz, p->len, p->rem, p->csum);
429: rc = 1;
430: out:
431: free(buf);
432: return rc;
433: }