Annotation of src/usr.bin/rsync/blocks.c, Revision 1.22
1.22 ! claudio 1: /* $OpenBSD: blocks.c,v 1.21 2021/11/03 14:42:12 deraadt Exp $ */
1.1 benno 2: /*
3: * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
1.18 florian 17: #include <sys/queue.h>
1.1 benno 18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <endian.h>
22: #include <errno.h>
23: #include <inttypes.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
1.6 tb 29: #include <openssl/md4.h>
30:
1.1 benno 31: #include "extern.h"
32:
1.18 florian 33: struct blkhash {
34: const struct blk *blk;
1.19 deraadt 35: TAILQ_ENTRY(blkhash) entries;
1.18 florian 36: };
37:
38: TAILQ_HEAD(blkhashq, blkhash);
39:
40: /*
41: * Table used by the sender for looking up blocks.
42: * The blocks are those sent by the receiver; we're looking up using
43: * hashes computed from our local file.
44: */
45: struct blktab {
46: struct blkhashq *q; /* entries in the hashtable */
47: size_t qsz; /* size of the hashtable */
48: struct blkhash *blks; /* pre-allocated hashtable entries */
49: };
50:
51: /*
52: * This is the number of buckets in the hashtable.
53: * Use the same that GPL rsync uses.
54: * (It should be dynamic?)
55: */
56: #define BLKTAB_SZ 65536
57:
58: /*
59: * Initialise an empty hashtable with BLKTAB_SZ entries in it.
60: * Populate it for each file with blkhash_set.
61: * When we've processed all files, call blkhash_free.
62: * Returns NULL on allocation failure.
63: */
64: struct blktab *
65: blkhash_alloc(void)
66: {
67: struct blktab *p;
68:
69: if ((p = calloc(1, sizeof(struct blktab))) == NULL) {
70: ERR("calloc");
71: return NULL;
72: }
73: p->qsz = BLKTAB_SZ;
74: p->q = calloc(p->qsz, sizeof(struct blkhashq));
75: if (p->q == NULL) {
76: ERR("calloc");
77: free(p);
78: return NULL;
79: }
80: return p;
81: }
82:
83: /*
84: * Populate the hashtable with an incoming file's blocks.
85: * This will clear out any existing hashed data.
86: * Returns zero on allocation failure, non-zero otherwise.
87: */
88: int
89: blkhash_set(struct blktab *p, const struct blkset *bset)
90: {
91: size_t i, idx;
92:
93: if (bset == NULL)
94: return 1;
95:
96: /* Wipe clean the table. */
97:
98: for (i = 0; i < p->qsz; i++)
99: TAILQ_INIT(&p->q[i]);
100:
101: /* Fill in the hashtable. */
102:
1.19 deraadt 103: p->blks = reallocarray(p->blks, bset->blksz, sizeof(struct blkhash));
1.18 florian 104: if (p->blks == NULL) {
105: ERR("reallocarray");
106: return 0;
107: }
108: for (i = 0; i < bset->blksz; i++) {
109: p->blks[i].blk = &bset->blks[i];
110: idx = bset->blks[i].chksum_short % p->qsz;
111: assert(idx < p->qsz);
112: TAILQ_INSERT_TAIL(&p->q[idx], &p->blks[i], entries);
113: }
114:
115: return 1;
116: }
117:
118: /*
119: * Free as allocated with blkhash_alloc().
120: */
121: void
122: blkhash_free(struct blktab *p)
123: {
124:
125: free(p->blks);
126: free(p);
127: }
128:
1.1 benno 129: /*
130: * From our current position of "offs" in buffer "buf" of total size
131: * "size", see if we can find a matching block in our list of blocks.
132: * The "hint" refers to the block that *might* work.
133: * Returns the blk or NULL if no matching block was found.
134: */
1.18 florian 135: static const struct blk *
136: blk_find(struct sess *sess, struct blkstat *st,
137: const struct blkset *blks, const char *path, int recomp)
1.1 benno 138: {
139: unsigned char md[MD4_DIGEST_LENGTH];
140: uint32_t fhash;
141: off_t remain, osz;
142: int have_md = 0;
1.18 florian 143: char *map;
144: const struct blkhashq *q;
145: const struct blkhash *ent;
146:
147: remain = st->mapsz - st->offs;
148: assert(remain);
149: osz = MINIMUM(remain, (off_t)blks->len);
1.1 benno 150:
151: /*
1.18 florian 152: * First, compute our fast hash the hard way (if we're
153: * reentering this function from a previous block match, or the
154: * first time) or from our existing s1 and s2 values.
1.1 benno 155: */
156:
1.18 florian 157: if (!recomp) {
158: fhash = (st->s1 & 0xFFFF) | (st->s2 << 16);
159: } else {
160: fhash = hash_fast(st->map + st->offs, (size_t)osz);
161: st->s1 = fhash & 0xFFFF;
162: st->s2 = fhash >> 16;
163: }
1.1 benno 164:
165: /*
166: * Start with our match hint.
167: * This just runs the fast and slow check with the hint.
168: */
169:
1.18 florian 170: if (st->hint < blks->blksz &&
171: fhash == blks->blks[st->hint].chksum_short &&
172: (size_t)osz == blks->blks[st->hint].len) {
173: hash_slow(st->map + st->offs, (size_t)osz, md, sess);
1.1 benno 174: have_md = 1;
1.18 florian 175: if (memcmp(md, blks->blks[st->hint].chksum_long, blks->csum) == 0) {
1.15 benno 176: LOG4("%s: found matching hinted match: "
1.14 deraadt 177: "position %jd, block %zu (position %jd, size %zu)",
178: path,
1.18 florian 179: (intmax_t)st->offs, blks->blks[st->hint].idx,
180: (intmax_t)blks->blks[st->hint].offs,
181: blks->blks[st->hint].len);
182: return &blks->blks[st->hint];
1.1 benno 183: }
184: }
185:
186: /*
1.18 florian 187: * Look for the fast hash modulus in our hashtable, filter for
188: * those matching the full hash and length, then move to the
189: * slow hash.
190: * The slow hash is computed only once.
1.1 benno 191: */
192:
1.18 florian 193: q = &st->blktab->q[fhash % st->blktab->qsz];
194:
195: TAILQ_FOREACH(ent, q, entries) {
196: if (fhash != ent->blk->chksum_short ||
197: (size_t)osz != ent->blk->len)
1.1 benno 198: continue;
199:
1.15 benno 200: LOG4("%s: found matching fast match: "
1.14 deraadt 201: "position %jd, block %zu (position %jd, size %zu)",
1.18 florian 202: path, (intmax_t)st->offs, ent->blk->idx,
203: (intmax_t)ent->blk->offs, ent->blk->len);
1.1 benno 204:
1.4 deraadt 205: if (have_md == 0) {
1.18 florian 206: hash_slow(st->map + st->offs, (size_t)osz, md, sess);
1.1 benno 207: have_md = 1;
208: }
209:
1.18 florian 210: if (memcmp(md, ent->blk->chksum_long, blks->csum))
1.1 benno 211: continue;
212:
1.15 benno 213: LOG4("%s: sender verifies slow match", path);
1.18 florian 214: return ent->blk;
1.1 benno 215: }
216:
1.18 florian 217: /*
218: * Adjust our partial sums for the hashing.
219: * We first remove the first byte from the sum.
220: * We then, if we have space, add the first byte of the next
221: * block in the sequence.
222: */
223:
224: map = st->map + st->offs;
225: st->s1 -= map[0];
226: st->s2 -= osz * map[0];
227:
228: if (osz < remain) {
229: st->s1 += map[osz];
230: st->s2 += st->s1;
1.19 deraadt 231: }
1.18 florian 232:
1.1 benno 233: return NULL;
234: }
235:
236: /*
237: * Given a local file "path" and the blocks created by a remote machine,
238: * find out which blocks of our file they don't have and send them.
1.7 florian 239: * This function is reentrant: it must be called while there's still
240: * data to send.
1.1 benno 241: */
1.9 florian 242: void
243: blk_match(struct sess *sess, const struct blkset *blks,
1.7 florian 244: const char *path, struct blkstat *st)
1.1 benno 245: {
1.18 florian 246: off_t last, end, sz;
247: int32_t tok;
248: size_t i;
249: const struct blk *blk;
1.1 benno 250:
251: /*
252: * If the file's empty or we don't have any blocks from the
253: * sender, then simply send the whole file.
254: * Otherwise, run the hash matching routine and send raw chunks
255: * and subsequent matching tokens.
256: */
257:
1.7 florian 258: if (st->mapsz && blks->blksz) {
1.9 florian 259: /*
260: * Stop searching at the length of the file minus the
261: * size of the last block.
262: * The reason for this being that we don't need to do an
263: * incremental hash within the last block---if it
264: * doesn't match, it doesn't match.
265: */
266:
267: end = st->mapsz + 1 - blks->blks[blks->blksz - 1].len;
268: last = st->offs;
269:
1.18 florian 270: for (i = 0; st->offs < end; st->offs++, i++) {
271: blk = blk_find(sess, st, blks, path, i == 0);
1.9 florian 272: if (blk == NULL)
273: continue;
274:
275: sz = st->offs - last;
276: st->dirty += sz;
277: st->total += sz;
1.15 benno 278: LOG4("%s: flushing %jd B before %zu B block %zu",
1.14 deraadt 279: path, (intmax_t)sz,
280: blk->len, blk->idx);
1.9 florian 281: tok = -(blk->idx + 1);
282:
1.22 ! claudio 283: hash_file_buf(&st->ctx, st->map + last, sz + blk->len);
! 284:
1.9 florian 285: /*
286: * Write the data we have, then follow it with
287: * the tag of the block that matches.
288: */
289:
290: st->curpos = last;
291: st->curlen = st->curpos + sz;
292: st->curtok = tok;
1.10 deraadt 293: assert(st->curtok != 0);
1.9 florian 294: st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
295: st->total += blk->len;
296: st->offs += blk->len;
297: st->hint = blk->idx + 1;
1.22 ! claudio 298:
1.9 florian 299: return;
1.1 benno 300: }
301:
1.9 florian 302: /* Emit remaining data and send terminator token. */
303:
304: sz = st->mapsz - last;
1.15 benno 305: LOG4("%s: flushing remaining %jd B",
1.14 deraadt 306: path, (intmax_t)sz);
1.1 benno 307:
1.9 florian 308: st->total += sz;
309: st->dirty += sz;
310: st->curpos = last;
311: st->curlen = st->curpos + sz;
312: st->curtok = 0;
313: st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
1.22 ! claudio 314:
! 315: hash_file_buf(&st->ctx, st->map + st->curpos, sz);
1.9 florian 316: } else {
317: st->curpos = 0;
318: st->curlen = st->mapsz;
319: st->curtok = 0;
320: st->curst = st->mapsz ? BLKSTAT_DATA : BLKSTAT_TOK;
321: st->dirty = st->total = st->mapsz;
1.22 ! claudio 322:
! 323: hash_file_buf(&st->ctx, st->map, st->mapsz);
1.1 benno 324:
1.15 benno 325: LOG4("%s: flushing whole file %zu B",
1.14 deraadt 326: path, st->mapsz);
1.1 benno 327: }
328: }
329:
330: /*
1.13 benno 331: * Buffer the message from sender to the receiver indicating that the
332: * block set has been received.
1.1 benno 333: * Symmetrises blk_send_ack().
334: */
1.13 benno 335: void
1.16 benno 336: blk_recv_ack(char buf[20], const struct blkset *blocks, int32_t idx)
1.1 benno 337: {
1.8 florian 338: size_t pos = 0, sz;
1.1 benno 339:
1.8 florian 340: sz = sizeof(int32_t) + /* index */
1.21 deraadt 341: sizeof(int32_t) + /* block count */
342: sizeof(int32_t) + /* block length */
343: sizeof(int32_t) + /* checksum length */
344: sizeof(int32_t); /* block remainder */
1.13 benno 345: assert(sz == 20);
1.8 florian 346:
1.16 benno 347: io_buffer_int(buf, &pos, sz, idx);
348: io_buffer_int(buf, &pos, sz, blocks->blksz);
349: io_buffer_int(buf, &pos, sz, blocks->len);
350: io_buffer_int(buf, &pos, sz, blocks->csum);
351: io_buffer_int(buf, &pos, sz, blocks->rem);
1.8 florian 352: assert(pos == sz);
1.1 benno 353: }
354:
355: /*
356: * Read all of the checksums for a file's blocks.
357: * Returns the set of blocks or NULL on failure.
358: */
359: struct blkset *
360: blk_recv(struct sess *sess, int fd, const char *path)
361: {
362: struct blkset *s;
363: int32_t i;
364: size_t j;
365: struct blk *b;
366: off_t offs = 0;
367:
1.4 deraadt 368: if ((s = calloc(1, sizeof(struct blkset))) == NULL) {
1.15 benno 369: ERR("calloc");
1.1 benno 370: return NULL;
371: }
372:
373: /*
374: * The block prologue consists of a few values that we'll need
375: * in reading the individual blocks for this file.
376: * FIXME: read into buffer and unbuffer.
377: */
378:
1.3 deraadt 379: if (!io_read_size(sess, fd, &s->blksz)) {
1.15 benno 380: ERRX1("io_read_size");
1.1 benno 381: goto out;
1.3 deraadt 382: } else if (!io_read_size(sess, fd, &s->len)) {
1.15 benno 383: ERRX1("io_read_size");
1.1 benno 384: goto out;
1.3 deraadt 385: } else if (!io_read_size(sess, fd, &s->csum)) {
1.15 benno 386: ERRX1("io_read_int");
1.1 benno 387: goto out;
1.3 deraadt 388: } else if (!io_read_size(sess, fd, &s->rem)) {
1.15 benno 389: ERRX1("io_read_int");
1.1 benno 390: goto out;
391: } else if (s->rem && s->rem >= s->len) {
1.15 benno 392: ERRX("block remainder is "
1.1 benno 393: "greater than block size");
394: goto out;
395: }
396:
1.15 benno 397: LOG3("%s: read block prologue: %zu blocks of "
1.14 deraadt 398: "%zu B, %zu B remainder, %zu B checksum", path,
399: s->blksz, s->len, s->rem, s->csum);
1.1 benno 400:
401: if (s->blksz) {
402: s->blks = calloc(s->blksz, sizeof(struct blk));
1.4 deraadt 403: if (s->blks == NULL) {
1.15 benno 404: ERR("calloc");
1.1 benno 405: goto out;
406: }
407: }
408:
1.2 benno 409: /*
1.1 benno 410: * Read each block individually.
411: * FIXME: read buffer and unbuffer.
412: */
413:
414: for (j = 0; j < s->blksz; j++) {
415: b = &s->blks[j];
1.3 deraadt 416: if (!io_read_int(sess, fd, &i)) {
1.15 benno 417: ERRX1("io_read_int");
1.1 benno 418: goto out;
419: }
420: b->chksum_short = i;
421:
422: assert(s->csum <= sizeof(b->chksum_long));
1.3 deraadt 423: if (!io_read_buf(sess,
1.1 benno 424: fd, b->chksum_long, s->csum)) {
1.15 benno 425: ERRX1("io_read_buf");
1.1 benno 426: goto out;
427: }
428:
429: /*
430: * If we're the last block, then we're assigned the
431: * remainder of the data.
432: */
433:
434: b->offs = offs;
435: b->idx = j;
436: b->len = (j == (s->blksz - 1) && s->rem) ?
437: s->rem : s->len;
438: offs += b->len;
439:
1.15 benno 440: LOG4("%s: read block %zu, length %zu B",
1.14 deraadt 441: path, b->idx, b->len);
1.1 benno 442: }
443:
444: s->size = offs;
1.15 benno 445: LOG3("%s: read blocks: %zu blocks, %jd B total blocked data",
1.14 deraadt 446: path, s->blksz, (intmax_t)s->size);
1.1 benno 447: return s;
448: out:
1.7 florian 449: free(s->blks);
450: free(s);
1.1 benno 451: return NULL;
452: }
453:
454: /*
455: * Symmetrise blk_recv_ack(), except w/o the leading identifier.
456: * Return zero on failure, non-zero on success.
457: */
458: int
459: blk_send_ack(struct sess *sess, int fd, struct blkset *p)
460: {
461: char buf[16];
462: size_t pos = 0, sz;
463:
464: /* Put the entire send routine into a buffer. */
465:
466: sz = sizeof(int32_t) + /* block count */
1.21 deraadt 467: sizeof(int32_t) + /* block length */
468: sizeof(int32_t) + /* checksum length */
469: sizeof(int32_t); /* block remainder */
1.1 benno 470: assert(sz <= sizeof(buf));
471:
1.3 deraadt 472: if (!io_read_buf(sess, fd, buf, sz)) {
1.15 benno 473: ERRX1("io_read_buf");
1.1 benno 474: return 0;
475: }
476:
1.16 benno 477: if (!io_unbuffer_size(buf, &pos, sz, &p->blksz))
1.15 benno 478: ERRX1("io_unbuffer_size");
1.16 benno 479: else if (!io_unbuffer_size(buf, &pos, sz, &p->len))
1.15 benno 480: ERRX1("io_unbuffer_size");
1.16 benno 481: else if (!io_unbuffer_size(buf, &pos, sz, &p->csum))
1.15 benno 482: ERRX1("io_unbuffer_size");
1.16 benno 483: else if (!io_unbuffer_size(buf, &pos, sz, &p->rem))
1.15 benno 484: ERRX1("io_unbuffer_size");
1.1 benno 485: else if (p->len && p->rem >= p->len)
1.15 benno 486: ERRX1("non-zero length is less than remainder");
1.4 deraadt 487: else if (p->csum == 0 || p->csum > 16)
1.15 benno 488: ERRX1("inappropriate checksum length");
1.1 benno 489: else
490: return 1;
491:
492: return 0;
493: }
494:
495: /*
496: * Transmit the metadata for set and blocks.
497: * Return zero on failure, non-zero on success.
498: */
499: int
500: blk_send(struct sess *sess, int fd, size_t idx,
501: const struct blkset *p, const char *path)
502: {
503: char *buf;
504: size_t i, pos = 0, sz;
505: int rc = 0;
506:
507: /* Put the entire send routine into a buffer. */
508:
509: sz = sizeof(int32_t) + /* identifier */
1.2 benno 510: sizeof(int32_t) + /* block count */
511: sizeof(int32_t) + /* block length */
512: sizeof(int32_t) + /* checksum length */
513: sizeof(int32_t) + /* block remainder */
514: p->blksz *
515: (sizeof(int32_t) + /* short checksum */
516: p->csum); /* long checksum */
1.1 benno 517:
1.4 deraadt 518: if ((buf = malloc(sz)) == NULL) {
1.15 benno 519: ERR("malloc");
1.1 benno 520: return 0;
521: }
522:
1.16 benno 523: io_buffer_int(buf, &pos, sz, idx);
524: io_buffer_int(buf, &pos, sz, p->blksz);
525: io_buffer_int(buf, &pos, sz, p->len);
526: io_buffer_int(buf, &pos, sz, p->csum);
527: io_buffer_int(buf, &pos, sz, p->rem);
1.1 benno 528:
529: for (i = 0; i < p->blksz; i++) {
1.19 deraadt 530: io_buffer_int(buf, &pos, sz, p->blks[i].chksum_short);
1.16 benno 531: io_buffer_buf(buf, &pos, sz, p->blks[i].chksum_long, p->csum);
1.1 benno 532: }
533:
534: assert(pos == sz);
535:
1.3 deraadt 536: if (!io_write_buf(sess, fd, buf, sz)) {
1.15 benno 537: ERRX1("io_write_buf");
1.1 benno 538: goto out;
539: }
540:
1.15 benno 541: LOG3("%s: sent block prologue: %zu blocks of %zu B, "
1.14 deraadt 542: "%zu B remainder, %zu B checksum",
543: path, p->blksz, p->len, p->rem, p->csum);
1.1 benno 544: rc = 1;
545: out:
546: free(buf);
547: return rc;
548: }