Annotation of src/usr.bin/rsync/blocks.c, Revision 1.7
1.7 ! florian 1: /* $Id: blocks.c,v 1.6 2019/02/13 05:41:35 tb Exp $ */
1.1 benno 2: /*
3: * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18:
19: #include <assert.h>
20: #include <endian.h>
21: #include <errno.h>
22: #include <inttypes.h>
23: #include <stdio.h>
24: #include <stdlib.h>
25: #include <string.h>
26: #include <unistd.h>
27:
1.6 tb 28: #include <openssl/md4.h>
29:
1.1 benno 30: #include "extern.h"
31:
32: /*
33: * Flush out "size" bytes of the buffer, doing all of the appropriate
34: * chunking of the data, then the subsequent token (or zero).
35: * Return zero on failure, non-zero on success.
36: */
37: static int
38: blk_flush(struct sess *sess, int fd,
39: const void *b, off_t size, int32_t token)
40: {
41: off_t i = 0, sz;
42:
43: while (i < size) {
44: sz = MAX_CHUNK < (size - i) ?
45: MAX_CHUNK : (size - i);
1.3 deraadt 46: if (!io_write_int(sess, fd, sz)) {
1.1 benno 47: ERRX1(sess, "io_write_int");
48: return 0;
1.3 deraadt 49: } else if (!io_write_buf(sess, fd, b + i, sz)) {
1.1 benno 50: ERRX1(sess, "io_write_buf");
51: return 0;
52: }
53: i += sz;
54: }
55:
1.3 deraadt 56: if (!io_write_int(sess, fd, token)) {
1.1 benno 57: ERRX1(sess, "io_write_int");
58: return 0;
59: }
60:
61: return 1;
62: }
63:
64: /*
65: * From our current position of "offs" in buffer "buf" of total size
66: * "size", see if we can find a matching block in our list of blocks.
67: * The "hint" refers to the block that *might* work.
68: * Returns the blk or NULL if no matching block was found.
69: */
70: static struct blk *
71: blk_find(struct sess *sess, const void *buf, off_t size, off_t offs,
72: const struct blkset *blks, const char *path, size_t hint)
73: {
74: unsigned char md[MD4_DIGEST_LENGTH];
75: uint32_t fhash;
76: off_t remain, osz;
77: size_t i;
78: int have_md = 0;
79:
80: /*
81: * First, compute our fast hash.
82: * FIXME: yes, this can be a rolling computation, but I'm
83: * deliberately making it simple first.
84: */
85:
86: remain = size - offs;
87: assert(remain);
88: osz = remain < (off_t)blks->len ? remain : (off_t)blks->len;
89: fhash = hash_fast(buf + offs, (size_t)osz);
90: have_md = 0;
91:
92: /*
93: * Start with our match hint.
94: * This just runs the fast and slow check with the hint.
95: */
96:
97: if (hint < blks->blksz &&
98: fhash == blks->blks[hint].chksum_short &&
99: (size_t)osz == blks->blks[hint].len) {
100: hash_slow(buf + offs, (size_t)osz, md, sess);
101: have_md = 1;
1.4 deraadt 102: if (memcmp(md, blks->blks[hint].chksum_long, blks->csum) == 0) {
1.1 benno 103: LOG4(sess, "%s: found matching hinted match: "
104: "position %jd, block %zu "
105: "(position %jd, size %zu)", path,
106: (intmax_t)offs, blks->blks[hint].idx,
107: (intmax_t)blks->blks[hint].offs,
108: blks->blks[hint].len);
109: return &blks->blks[hint];
110: }
111: }
112:
113: /*
114: * Now loop and look for the fast hash.
115: * If it's found, move on to the slow hash.
116: */
117:
118: for (i = 0; i < blks->blksz; i++) {
119: if (fhash != blks->blks[i].chksum_short)
120: continue;
121: if ((size_t)osz != blks->blks[i].len)
122: continue;
123:
124: LOG4(sess, "%s: found matching fast match: "
125: "position %jd, block %zu "
126: "(position %jd, size %zu)", path,
127: (intmax_t)offs, blks->blks[i].idx,
128: (intmax_t)blks->blks[i].offs,
129: blks->blks[i].len);
130:
131: /* Compute slow hash on demand. */
132:
1.4 deraadt 133: if (have_md == 0) {
1.1 benno 134: hash_slow(buf + offs, (size_t)osz, md, sess);
135: have_md = 1;
136: }
137:
138: if (memcmp(md, blks->blks[i].chksum_long, blks->csum))
139: continue;
140:
141: LOG4(sess, "%s: sender verifies slow match", path);
142: return &blks->blks[i];
143: }
144:
145: return NULL;
146: }
147:
148: /*
149: * The main reconstruction algorithm on the sender side.
1.7 ! florian 150: * This is reentrant: it's meant to be called whenever "fd" unblocks for
! 151: * writing by the sender.
1.1 benno 152: * Scans byte-wise over the input file, looking for matching blocks in
153: * what the server sent us.
154: * If a block is found, emit all data up until the block, then the token
155: * for the block.
156: * The receiving end can then reconstruct the file trivially.
157: * Return zero on failure, non-zero on success.
158: */
159: static int
160: blk_match_send(struct sess *sess, const char *path, int fd,
1.7 ! florian 161: const struct blkset *blks, struct blkstat *st)
1.1 benno 162: {
1.7 ! florian 163: off_t last, end, sz;
1.1 benno 164: int32_t tok;
165: struct blk *blk;
166:
167: /*
168: * Stop searching at the length of the file minus the size of
169: * the last block.
170: * The reason for this being that we don't need to do an
171: * incremental hash within the last block---if it doesn't match,
172: * it doesn't match.
173: */
174:
1.7 ! florian 175: end = st->mapsz + 1 - blks->blks[blks->blksz - 1].len;
! 176: last = st->offs;
1.1 benno 177:
1.7 ! florian 178: for ( ; st->offs < end; st->offs++) {
! 179: blk = blk_find(sess, st->map, st->mapsz,
! 180: st->offs, blks, path, st->hint);
1.4 deraadt 181: if (blk == NULL)
1.1 benno 182: continue;
183:
1.7 ! florian 184: sz = st->offs - last;
! 185: st->dirty += sz;
! 186: st->total += sz;
1.1 benno 187: LOG4(sess, "%s: flushing %jd B before %zu B "
1.7 ! florian 188: "block %zu", path, (intmax_t)sz, blk->len, blk->idx);
1.1 benno 189: tok = -(blk->idx + 1);
190:
191: /*
192: * Write the data we have, then follow it with the tag
193: * of the block that matches.
194: * The receiver will then write our data, then the data
195: * it already has in the matching block.
196: */
197:
1.7 ! florian 198: if (!blk_flush(sess, fd, st->map + last, sz, tok)) {
1.1 benno 199: ERRX1(sess, "blk_flush");
1.7 ! florian 200: return -1;
1.1 benno 201: }
202:
1.7 ! florian 203: st->total += blk->len;
! 204: st->offs += blk->len;
! 205: st->hint = blk->idx + 1;
! 206: return 0;
1.1 benno 207: }
208:
209: /* Emit remaining data and send terminator token. */
210:
1.7 ! florian 211: sz = st->mapsz - last;
! 212: st->total += sz;
! 213: st->dirty += sz;
1.1 benno 214:
215: LOG4(sess, "%s: flushing remaining %jd B", path, (intmax_t)sz);
216:
1.7 ! florian 217: if (!blk_flush(sess, fd, st->map + last, sz, 0)) {
1.1 benno 218: ERRX1(sess, "blk_flush");
1.7 ! florian 219: return -1;
1.1 benno 220: }
221:
222: LOG3(sess, "%s: flushed (chunked) %jd B total, "
1.7 ! florian 223: "%.2f%% upload ratio", path, (intmax_t)st->total,
! 224: 100.0 * st->dirty / st->total);
1.1 benno 225: return 1;
226: }
227:
228: /*
229: * Given a local file "path" and the blocks created by a remote machine,
230: * find out which blocks of our file they don't have and send them.
1.7 ! florian 231: * This function is reentrant: it must be called while there's still
! 232: * data to send.
! 233: * Return 0 if there's more data to send, >0 if the file has completed
! 234: * its update, or <0 on error.
1.1 benno 235: */
236: int
1.7 ! florian 237: blk_match(struct sess *sess, int fd, const struct blkset *blks,
! 238: const char *path, struct blkstat *st)
1.1 benno 239: {
240: unsigned char filemd[MD4_DIGEST_LENGTH];
1.7 ! florian 241: int c;
1.1 benno 242:
243: /*
244: * If the file's empty or we don't have any blocks from the
245: * sender, then simply send the whole file.
246: * Otherwise, run the hash matching routine and send raw chunks
247: * and subsequent matching tokens.
248: */
249:
1.7 ! florian 250: if (st->mapsz && blks->blksz) {
! 251: if ((c = blk_match_send(sess, path, fd, blks, st)) < 0) {
1.1 benno 252: ERRX1(sess, "blk_match_send");
1.7 ! florian 253: return -1;
! 254: } else if (c == 0)
! 255: return 0;
1.1 benno 256: } else {
1.7 ! florian 257: if (!blk_flush(sess, fd, st->map, st->mapsz, 0)) {
1.1 benno 258: ERRX1(sess, "blk_flush");
1.7 ! florian 259: return -1;
1.1 benno 260: }
1.5 benno 261: LOG3(sess, "%s: flushed (un-chunked) %jd B, 100%% upload ratio",
1.7 ! florian 262: path, (intmax_t)st->mapsz);
1.1 benno 263: }
264:
1.2 benno 265: /*
1.1 benno 266: * Now write the full file hash.
267: * Since we're seeding the hash, this always gives us some sort
268: * of data even if the file's zero-length.
269: */
270:
1.7 ! florian 271: hash_file(st->map, st->mapsz, filemd, sess);
1.1 benno 272:
1.3 deraadt 273: if (!io_write_buf(sess, fd, filemd, MD4_DIGEST_LENGTH)) {
1.1 benno 274: ERRX1(sess, "io_write_buf");
1.7 ! florian 275: return -1;
1.1 benno 276: }
277:
1.7 ! florian 278: return 1;
1.1 benno 279: }
280:
281: /*
282: * Sent from the sender to the receiver to indicate that the block set
283: * has been received.
284: * Symmetrises blk_send_ack().
285: * Returns zero on failure, non-zero on success.
286: */
287: int
288: blk_recv_ack(struct sess *sess,
289: int fd, const struct blkset *blocks, int32_t idx)
290: {
291:
292: /* FIXME: put into static block. */
293:
1.3 deraadt 294: if (!io_write_int(sess, fd, idx))
1.1 benno 295: ERRX1(sess, "io_write_int");
1.3 deraadt 296: else if (!io_write_int(sess, fd, blocks->blksz))
1.1 benno 297: ERRX1(sess, "io_write_int");
1.3 deraadt 298: else if (!io_write_int(sess, fd, blocks->len))
1.1 benno 299: ERRX1(sess, "io_write_int");
1.3 deraadt 300: else if (!io_write_int(sess, fd, blocks->csum))
1.1 benno 301: ERRX1(sess, "io_write_int");
1.3 deraadt 302: else if (!io_write_int(sess, fd, blocks->rem))
1.1 benno 303: ERRX1(sess, "io_write_int");
304: else
305: return 1;
306:
307: return 0;
308: }
309:
310: /*
311: * Read all of the checksums for a file's blocks.
312: * Returns the set of blocks or NULL on failure.
313: */
314: struct blkset *
315: blk_recv(struct sess *sess, int fd, const char *path)
316: {
317: struct blkset *s;
318: int32_t i;
319: size_t j;
320: struct blk *b;
321: off_t offs = 0;
322:
1.4 deraadt 323: if ((s = calloc(1, sizeof(struct blkset))) == NULL) {
1.1 benno 324: ERR(sess, "calloc");
325: return NULL;
326: }
327:
328: /*
329: * The block prologue consists of a few values that we'll need
330: * in reading the individual blocks for this file.
331: * FIXME: read into buffer and unbuffer.
332: */
333:
1.3 deraadt 334: if (!io_read_size(sess, fd, &s->blksz)) {
1.1 benno 335: ERRX1(sess, "io_read_size");
336: goto out;
1.3 deraadt 337: } else if (!io_read_size(sess, fd, &s->len)) {
1.1 benno 338: ERRX1(sess, "io_read_size");
339: goto out;
1.3 deraadt 340: } else if (!io_read_size(sess, fd, &s->csum)) {
1.1 benno 341: ERRX1(sess, "io_read_int");
342: goto out;
1.3 deraadt 343: } else if (!io_read_size(sess, fd, &s->rem)) {
1.1 benno 344: ERRX1(sess, "io_read_int");
345: goto out;
346: } else if (s->rem && s->rem >= s->len) {
347: ERRX(sess, "block remainder is "
348: "greater than block size");
349: goto out;
350: }
351:
352: LOG3(sess, "%s: read block prologue: %zu blocks of "
353: "%zu B, %zu B remainder, %zu B checksum", path,
354: s->blksz, s->len, s->rem, s->csum);
355:
356: if (s->blksz) {
357: s->blks = calloc(s->blksz, sizeof(struct blk));
1.4 deraadt 358: if (s->blks == NULL) {
1.1 benno 359: ERR(sess, "calloc");
360: goto out;
361: }
362: }
363:
1.2 benno 364: /*
1.1 benno 365: * Read each block individually.
366: * FIXME: read buffer and unbuffer.
367: */
368:
369: for (j = 0; j < s->blksz; j++) {
370: b = &s->blks[j];
1.3 deraadt 371: if (!io_read_int(sess, fd, &i)) {
1.1 benno 372: ERRX1(sess, "io_read_int");
373: goto out;
374: }
375: b->chksum_short = i;
376:
377: assert(s->csum <= sizeof(b->chksum_long));
1.3 deraadt 378: if (!io_read_buf(sess,
1.1 benno 379: fd, b->chksum_long, s->csum)) {
380: ERRX1(sess, "io_read_buf");
381: goto out;
382: }
383:
384: /*
385: * If we're the last block, then we're assigned the
386: * remainder of the data.
387: */
388:
389: b->offs = offs;
390: b->idx = j;
391: b->len = (j == (s->blksz - 1) && s->rem) ?
392: s->rem : s->len;
393: offs += b->len;
394:
395: LOG4(sess, "%s: read block %zu, "
396: "length %zu B", path, b->idx, b->len);
397: }
398:
399: s->size = offs;
400: LOG3(sess, "%s: read blocks: %zu blocks, %jd B total "
401: "blocked data", path, s->blksz, (intmax_t)s->size);
402: return s;
403: out:
1.7 ! florian 404: free(s->blks);
! 405: free(s);
1.1 benno 406: return NULL;
407: }
408:
409: /*
410: * Symmetrise blk_recv_ack(), except w/o the leading identifier.
411: * Return zero on failure, non-zero on success.
412: */
413: int
414: blk_send_ack(struct sess *sess, int fd, struct blkset *p)
415: {
416: char buf[16];
417: size_t pos = 0, sz;
418:
419: /* Put the entire send routine into a buffer. */
420:
421: sz = sizeof(int32_t) + /* block count */
422: sizeof(int32_t) + /* block length */
423: sizeof(int32_t) + /* checksum length */
424: sizeof(int32_t); /* block remainder */
425: assert(sz <= sizeof(buf));
426:
1.3 deraadt 427: if (!io_read_buf(sess, fd, buf, sz)) {
1.1 benno 428: ERRX1(sess, "io_read_buf");
429: return 0;
430: }
431:
1.3 deraadt 432: if (!io_unbuffer_size(sess, buf, &pos, sz, &p->blksz))
1.1 benno 433: ERRX1(sess, "io_unbuffer_size");
1.3 deraadt 434: else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->len))
1.1 benno 435: ERRX1(sess, "io_unbuffer_size");
1.3 deraadt 436: else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->csum))
1.1 benno 437: ERRX1(sess, "io_unbuffer_size");
1.3 deraadt 438: else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->rem))
1.1 benno 439: ERRX1(sess, "io_unbuffer_size");
440: else if (p->len && p->rem >= p->len)
441: ERRX1(sess, "non-zero length is less than remainder");
1.4 deraadt 442: else if (p->csum == 0 || p->csum > 16)
1.1 benno 443: ERRX1(sess, "inappropriate checksum length");
444: else
445: return 1;
446:
447: return 0;
448: }
449:
450: /*
451: * Transmit the metadata for set and blocks.
452: * Return zero on failure, non-zero on success.
453: */
454: int
455: blk_send(struct sess *sess, int fd, size_t idx,
456: const struct blkset *p, const char *path)
457: {
458: char *buf;
459: size_t i, pos = 0, sz;
460: int rc = 0;
461:
462: /* Put the entire send routine into a buffer. */
463:
464: sz = sizeof(int32_t) + /* identifier */
1.2 benno 465: sizeof(int32_t) + /* block count */
466: sizeof(int32_t) + /* block length */
467: sizeof(int32_t) + /* checksum length */
468: sizeof(int32_t) + /* block remainder */
469: p->blksz *
470: (sizeof(int32_t) + /* short checksum */
471: p->csum); /* long checksum */
1.1 benno 472:
1.4 deraadt 473: if ((buf = malloc(sz)) == NULL) {
1.1 benno 474: ERR(sess, "malloc");
475: return 0;
476: }
477:
478: io_buffer_int(sess, buf, &pos, sz, idx);
479: io_buffer_int(sess, buf, &pos, sz, p->blksz);
480: io_buffer_int(sess, buf, &pos, sz, p->len);
481: io_buffer_int(sess, buf, &pos, sz, p->csum);
482: io_buffer_int(sess, buf, &pos, sz, p->rem);
483:
484: for (i = 0; i < p->blksz; i++) {
1.2 benno 485: io_buffer_int(sess, buf, &pos,
1.1 benno 486: sz, p->blks[i].chksum_short);
1.2 benno 487: io_buffer_buf(sess, buf, &pos, sz,
1.1 benno 488: p->blks[i].chksum_long, p->csum);
489: }
490:
491: assert(pos == sz);
492:
1.3 deraadt 493: if (!io_write_buf(sess, fd, buf, sz)) {
1.1 benno 494: ERRX1(sess, "io_write_buf");
495: goto out;
496: }
497:
498: LOG3(sess, "%s: sent block prologue: %zu blocks of %zu B, "
499: "%zu B remainder, %zu B checksum", path,
500: p->blksz, p->len, p->rem, p->csum);
501: rc = 1;
502: out:
503: free(buf);
504: return rc;
505: }