Annotation of src/usr.bin/rsync/blocks.c, Revision 1.17
1.17 ! deraadt 1: /* $Id: blocks.c,v 1.16 2019/05/08 21:30:11 benno Exp $ */
1.1 benno 2: /*
3: * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18:
19: #include <assert.h>
20: #include <endian.h>
21: #include <errno.h>
22: #include <inttypes.h>
23: #include <stdio.h>
24: #include <stdlib.h>
25: #include <string.h>
26: #include <unistd.h>
27:
1.6 tb 28: #include <openssl/md4.h>
29:
1.1 benno 30: #include "extern.h"
31:
32: /*
33: * From our current position of "offs" in buffer "buf" of total size
34: * "size", see if we can find a matching block in our list of blocks.
35: * The "hint" refers to the block that *might* work.
36: * Returns the blk or NULL if no matching block was found.
37: */
38: static struct blk *
39: blk_find(struct sess *sess, const void *buf, off_t size, off_t offs,
40: const struct blkset *blks, const char *path, size_t hint)
41: {
42: unsigned char md[MD4_DIGEST_LENGTH];
43: uint32_t fhash;
44: off_t remain, osz;
45: size_t i;
46: int have_md = 0;
47:
48: /*
49: * First, compute our fast hash.
50: * FIXME: yes, this can be a rolling computation, but I'm
51: * deliberately making it simple first.
52: */
53:
54: remain = size - offs;
55: assert(remain);
56: osz = remain < (off_t)blks->len ? remain : (off_t)blks->len;
57: fhash = hash_fast(buf + offs, (size_t)osz);
58:
59: /*
60: * Start with our match hint.
61: * This just runs the fast and slow check with the hint.
62: */
63:
64: if (hint < blks->blksz &&
65: fhash == blks->blks[hint].chksum_short &&
66: (size_t)osz == blks->blks[hint].len) {
67: hash_slow(buf + offs, (size_t)osz, md, sess);
68: have_md = 1;
1.4 deraadt 69: if (memcmp(md, blks->blks[hint].chksum_long, blks->csum) == 0) {
1.15 benno 70: LOG4("%s: found matching hinted match: "
1.14 deraadt 71: "position %jd, block %zu (position %jd, size %zu)",
72: path,
73: (intmax_t)offs, blks->blks[hint].idx,
74: (intmax_t)blks->blks[hint].offs,
75: blks->blks[hint].len);
1.1 benno 76: return &blks->blks[hint];
77: }
78: }
79:
80: /*
81: * Now loop and look for the fast hash.
82: * If it's found, move on to the slow hash.
83: */
84:
85: for (i = 0; i < blks->blksz; i++) {
86: if (fhash != blks->blks[i].chksum_short)
87: continue;
88: if ((size_t)osz != blks->blks[i].len)
89: continue;
90:
1.15 benno 91: LOG4("%s: found matching fast match: "
1.14 deraadt 92: "position %jd, block %zu (position %jd, size %zu)",
93: path,
94: (intmax_t)offs, blks->blks[i].idx,
95: (intmax_t)blks->blks[i].offs,
96: blks->blks[i].len);
1.1 benno 97:
98: /* Compute slow hash on demand. */
99:
1.4 deraadt 100: if (have_md == 0) {
1.1 benno 101: hash_slow(buf + offs, (size_t)osz, md, sess);
102: have_md = 1;
103: }
104:
105: if (memcmp(md, blks->blks[i].chksum_long, blks->csum))
106: continue;
107:
1.15 benno 108: LOG4("%s: sender verifies slow match", path);
1.1 benno 109: return &blks->blks[i];
110: }
111:
112: return NULL;
113: }
114:
115: /*
116: * Given a local file "path" and the blocks created by a remote machine,
117: * find out which blocks of our file they don't have and send them.
1.7 florian 118: * This function is reentrant: it must be called while there's still
119: * data to send.
1.1 benno 120: */
1.9 florian 121: void
122: blk_match(struct sess *sess, const struct blkset *blks,
1.7 florian 123: const char *path, struct blkstat *st)
1.1 benno 124: {
1.9 florian 125: off_t last, end, sz;
126: int32_t tok;
127: struct blk *blk;
1.1 benno 128:
129: /*
130: * If the file's empty or we don't have any blocks from the
131: * sender, then simply send the whole file.
132: * Otherwise, run the hash matching routine and send raw chunks
133: * and subsequent matching tokens.
134: */
135:
1.7 florian 136: if (st->mapsz && blks->blksz) {
1.9 florian 137: /*
138: * Stop searching at the length of the file minus the
139: * size of the last block.
140: * The reason for this being that we don't need to do an
141: * incremental hash within the last block---if it
142: * doesn't match, it doesn't match.
143: */
144:
145: end = st->mapsz + 1 - blks->blks[blks->blksz - 1].len;
146: last = st->offs;
147:
148: for ( ; st->offs < end; st->offs++) {
149: blk = blk_find(sess, st->map, st->mapsz,
150: st->offs, blks, path, st->hint);
151: if (blk == NULL)
152: continue;
153:
154: sz = st->offs - last;
155: st->dirty += sz;
156: st->total += sz;
1.15 benno 157: LOG4("%s: flushing %jd B before %zu B block %zu",
1.14 deraadt 158: path, (intmax_t)sz,
159: blk->len, blk->idx);
1.9 florian 160: tok = -(blk->idx + 1);
161:
162: /*
163: * Write the data we have, then follow it with
164: * the tag of the block that matches.
165: */
166:
167: st->curpos = last;
168: st->curlen = st->curpos + sz;
169: st->curtok = tok;
1.10 deraadt 170: assert(st->curtok != 0);
1.9 florian 171: st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
172: st->total += blk->len;
173: st->offs += blk->len;
174: st->hint = blk->idx + 1;
175: return;
1.1 benno 176: }
177:
1.9 florian 178: /* Emit remaining data and send terminator token. */
179:
180: sz = st->mapsz - last;
1.15 benno 181: LOG4("%s: flushing remaining %jd B",
1.14 deraadt 182: path, (intmax_t)sz);
1.1 benno 183:
1.9 florian 184: st->total += sz;
185: st->dirty += sz;
186: st->curpos = last;
187: st->curlen = st->curpos + sz;
188: st->curtok = 0;
189: st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
190: } else {
191: st->curpos = 0;
192: st->curlen = st->mapsz;
193: st->curtok = 0;
194: st->curst = st->mapsz ? BLKSTAT_DATA : BLKSTAT_TOK;
195: st->dirty = st->total = st->mapsz;
1.1 benno 196:
1.15 benno 197: LOG4("%s: flushing whole file %zu B",
1.14 deraadt 198: path, st->mapsz);
1.1 benno 199: }
200: }
201:
202: /*
1.13 benno 203: * Buffer the message from sender to the receiver indicating that the
204: * block set has been received.
1.1 benno 205: * Symmetrises blk_send_ack().
206: */
1.13 benno 207: void
1.16 benno 208: blk_recv_ack(char buf[20], const struct blkset *blocks, int32_t idx)
1.1 benno 209: {
1.8 florian 210: size_t pos = 0, sz;
1.1 benno 211:
1.8 florian 212: sz = sizeof(int32_t) + /* index */
213: sizeof(int32_t) + /* block count */
214: sizeof(int32_t) + /* block length */
215: sizeof(int32_t) + /* checksum length */
216: sizeof(int32_t); /* block remainder */
1.13 benno 217: assert(sz == 20);
1.8 florian 218:
1.16 benno 219: io_buffer_int(buf, &pos, sz, idx);
220: io_buffer_int(buf, &pos, sz, blocks->blksz);
221: io_buffer_int(buf, &pos, sz, blocks->len);
222: io_buffer_int(buf, &pos, sz, blocks->csum);
223: io_buffer_int(buf, &pos, sz, blocks->rem);
1.8 florian 224: assert(pos == sz);
1.1 benno 225: }
226:
227: /*
228: * Read all of the checksums for a file's blocks.
229: * Returns the set of blocks or NULL on failure.
230: */
231: struct blkset *
232: blk_recv(struct sess *sess, int fd, const char *path)
233: {
234: struct blkset *s;
235: int32_t i;
236: size_t j;
237: struct blk *b;
238: off_t offs = 0;
239:
1.4 deraadt 240: if ((s = calloc(1, sizeof(struct blkset))) == NULL) {
1.15 benno 241: ERR("calloc");
1.1 benno 242: return NULL;
243: }
244:
245: /*
246: * The block prologue consists of a few values that we'll need
247: * in reading the individual blocks for this file.
248: * FIXME: read into buffer and unbuffer.
249: */
250:
1.3 deraadt 251: if (!io_read_size(sess, fd, &s->blksz)) {
1.15 benno 252: ERRX1("io_read_size");
1.1 benno 253: goto out;
1.3 deraadt 254: } else if (!io_read_size(sess, fd, &s->len)) {
1.15 benno 255: ERRX1("io_read_size");
1.1 benno 256: goto out;
1.3 deraadt 257: } else if (!io_read_size(sess, fd, &s->csum)) {
1.15 benno 258: ERRX1("io_read_int");
1.1 benno 259: goto out;
1.3 deraadt 260: } else if (!io_read_size(sess, fd, &s->rem)) {
1.15 benno 261: ERRX1("io_read_int");
1.1 benno 262: goto out;
263: } else if (s->rem && s->rem >= s->len) {
1.15 benno 264: ERRX("block remainder is "
1.1 benno 265: "greater than block size");
266: goto out;
267: }
268:
1.15 benno 269: LOG3("%s: read block prologue: %zu blocks of "
1.14 deraadt 270: "%zu B, %zu B remainder, %zu B checksum", path,
271: s->blksz, s->len, s->rem, s->csum);
1.1 benno 272:
273: if (s->blksz) {
274: s->blks = calloc(s->blksz, sizeof(struct blk));
1.4 deraadt 275: if (s->blks == NULL) {
1.15 benno 276: ERR("calloc");
1.1 benno 277: goto out;
278: }
279: }
280:
1.2 benno 281: /*
1.1 benno 282: * Read each block individually.
283: * FIXME: read buffer and unbuffer.
284: */
285:
286: for (j = 0; j < s->blksz; j++) {
287: b = &s->blks[j];
1.3 deraadt 288: if (!io_read_int(sess, fd, &i)) {
1.15 benno 289: ERRX1("io_read_int");
1.1 benno 290: goto out;
291: }
292: b->chksum_short = i;
293:
294: assert(s->csum <= sizeof(b->chksum_long));
1.3 deraadt 295: if (!io_read_buf(sess,
1.1 benno 296: fd, b->chksum_long, s->csum)) {
1.15 benno 297: ERRX1("io_read_buf");
1.1 benno 298: goto out;
299: }
300:
301: /*
302: * If we're the last block, then we're assigned the
303: * remainder of the data.
304: */
305:
306: b->offs = offs;
307: b->idx = j;
308: b->len = (j == (s->blksz - 1) && s->rem) ?
309: s->rem : s->len;
310: offs += b->len;
311:
1.15 benno 312: LOG4("%s: read block %zu, length %zu B",
1.14 deraadt 313: path, b->idx, b->len);
1.1 benno 314: }
315:
316: s->size = offs;
1.15 benno 317: LOG3("%s: read blocks: %zu blocks, %jd B total blocked data",
1.14 deraadt 318: path, s->blksz, (intmax_t)s->size);
1.1 benno 319: return s;
320: out:
1.7 florian 321: free(s->blks);
322: free(s);
1.1 benno 323: return NULL;
324: }
325:
326: /*
327: * Symmetrise blk_recv_ack(), except w/o the leading identifier.
328: * Return zero on failure, non-zero on success.
329: */
330: int
331: blk_send_ack(struct sess *sess, int fd, struct blkset *p)
332: {
333: char buf[16];
334: size_t pos = 0, sz;
335:
336: /* Put the entire send routine into a buffer. */
337:
338: sz = sizeof(int32_t) + /* block count */
339: sizeof(int32_t) + /* block length */
340: sizeof(int32_t) + /* checksum length */
341: sizeof(int32_t); /* block remainder */
342: assert(sz <= sizeof(buf));
343:
1.3 deraadt 344: if (!io_read_buf(sess, fd, buf, sz)) {
1.15 benno 345: ERRX1("io_read_buf");
1.1 benno 346: return 0;
347: }
348:
1.16 benno 349: if (!io_unbuffer_size(buf, &pos, sz, &p->blksz))
1.15 benno 350: ERRX1("io_unbuffer_size");
1.16 benno 351: else if (!io_unbuffer_size(buf, &pos, sz, &p->len))
1.15 benno 352: ERRX1("io_unbuffer_size");
1.16 benno 353: else if (!io_unbuffer_size(buf, &pos, sz, &p->csum))
1.15 benno 354: ERRX1("io_unbuffer_size");
1.16 benno 355: else if (!io_unbuffer_size(buf, &pos, sz, &p->rem))
1.15 benno 356: ERRX1("io_unbuffer_size");
1.1 benno 357: else if (p->len && p->rem >= p->len)
1.15 benno 358: ERRX1("non-zero length is less than remainder");
1.4 deraadt 359: else if (p->csum == 0 || p->csum > 16)
1.15 benno 360: ERRX1("inappropriate checksum length");
1.1 benno 361: else
362: return 1;
363:
364: return 0;
365: }
366:
367: /*
368: * Transmit the metadata for set and blocks.
369: * Return zero on failure, non-zero on success.
370: */
371: int
372: blk_send(struct sess *sess, int fd, size_t idx,
373: const struct blkset *p, const char *path)
374: {
375: char *buf;
376: size_t i, pos = 0, sz;
377: int rc = 0;
378:
379: /* Put the entire send routine into a buffer. */
380:
381: sz = sizeof(int32_t) + /* identifier */
1.2 benno 382: sizeof(int32_t) + /* block count */
383: sizeof(int32_t) + /* block length */
384: sizeof(int32_t) + /* checksum length */
385: sizeof(int32_t) + /* block remainder */
386: p->blksz *
387: (sizeof(int32_t) + /* short checksum */
388: p->csum); /* long checksum */
1.1 benno 389:
1.4 deraadt 390: if ((buf = malloc(sz)) == NULL) {
1.15 benno 391: ERR("malloc");
1.1 benno 392: return 0;
393: }
394:
1.16 benno 395: io_buffer_int(buf, &pos, sz, idx);
396: io_buffer_int(buf, &pos, sz, p->blksz);
397: io_buffer_int(buf, &pos, sz, p->len);
398: io_buffer_int(buf, &pos, sz, p->csum);
399: io_buffer_int(buf, &pos, sz, p->rem);
1.1 benno 400:
401: for (i = 0; i < p->blksz; i++) {
1.16 benno 402: io_buffer_int(buf, &pos,
1.1 benno 403: sz, p->blks[i].chksum_short);
1.16 benno 404: io_buffer_buf(buf, &pos, sz, p->blks[i].chksum_long, p->csum);
1.1 benno 405: }
406:
407: assert(pos == sz);
408:
1.3 deraadt 409: if (!io_write_buf(sess, fd, buf, sz)) {
1.15 benno 410: ERRX1("io_write_buf");
1.1 benno 411: goto out;
412: }
413:
1.15 benno 414: LOG3("%s: sent block prologue: %zu blocks of %zu B, "
1.14 deraadt 415: "%zu B remainder, %zu B checksum",
416: path, p->blksz, p->len, p->rem, p->csum);
1.1 benno 417: rc = 1;
418: out:
419: free(buf);
420: return rc;
421: }