Annotation of src/usr.bin/rsync/downloader.c, Revision 1.10
1.10 ! florian 1: /* $Id: downloader.c,v 1.9 2019/02/14 18:29:08 florian Exp $ */
1.1 benno 2: /*
3: * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/mman.h>
18: #include <sys/stat.h>
19:
20: #include <assert.h>
21: #include <errno.h>
22: #include <fcntl.h>
23: #include <inttypes.h>
24: #include <math.h>
25: #include <poll.h>
26: #include <stdio.h>
27: #include <stdlib.h>
28: #include <string.h>
29: #include <time.h>
30: #include <unistd.h>
31:
1.8 tb 32: #include <openssl/md4.h>
33:
1.1 benno 34: #include "extern.h"
35:
36: /*
37: * A small optimisation: have a 1 MB pre-write buffer.
38: * Disable the pre-write buffer by having this be zero.
39: * (It doesn't affect performance much.)
40: */
41: #define OBUF_SIZE (1024 * 1024)
42:
43: enum downloadst {
44: DOWNLOAD_READ_NEXT = 0,
45: DOWNLOAD_READ_LOCAL,
46: DOWNLOAD_READ_REMOTE
47: };
48:
49: /*
50: * Like struct upload, but used to keep track of what we're downloading.
51: * This also is managed by the receiver process.
52: */
53: struct download {
54: enum downloadst state; /* state of affairs */
1.2 benno 55: size_t idx; /* index of current file */
1.1 benno 56: struct blkset blk; /* its blocks */
57: void *map; /* mmap of current file */
58: size_t mapsz; /* length of mapsz */
59: int ofd; /* open origin file */
60: int fd; /* open output file */
61: char *fname; /* output filename */
1.2 benno 62: MD4_CTX ctx; /* current hashing context */
1.1 benno 63: off_t downloaded; /* total downloaded */
64: off_t total; /* total in file */
65: const struct flist *fl; /* file list */
66: size_t flsz; /* size of file list */
67: int rootfd; /* destination directory */
68: int fdin; /* read descriptor from sender */
69: char *obuf; /* pre-write buffer */
70: size_t obufsz; /* current size of obuf */
71: size_t obufmax; /* max size we'll wbuffer */
72: };
73:
74:
75: /*
76: * Simply log the filename.
77: */
78: static void
1.2 benno 79: log_file(struct sess *sess,
1.1 benno 80: const struct download *dl, const struct flist *f)
81: {
82: float frac, tot = dl->total;
83: int prec = 0;
84: const char *unit = "B";
85:
86: if (sess->opts->server)
87: return;
88:
1.2 benno 89: frac = 0 == dl->total ? 100.0 :
1.1 benno 90: 100.0 * dl->downloaded / dl->total;
91:
92: if (dl->total > 1024 * 1024 * 1024) {
93: tot = dl->total / (1024. * 1024. * 1024.);
94: prec = 3;
95: unit = "GB";
96: } else if (dl->total > 1024 * 1024) {
97: tot = dl->total / (1024. * 1024.);
98: prec = 2;
99: unit = "MB";
100: } else if (dl->total > 1024) {
101: tot = dl->total / 1024.;
102: prec = 1;
103: unit = "KB";
104: }
105:
1.2 benno 106: LOG1(sess, "%s (%.*f %s, %.1f%% downloaded)",
1.1 benno 107: f->path, prec, tot, unit, frac);
108: }
109:
110: /*
111: * Reinitialise a download context w/o overwriting the persistent parts
112: * of the structure (like p->fl or p->flsz) for index "idx".
113: * The MD4 context is pre-seeded.
114: */
115: static void
116: download_reinit(struct sess *sess, struct download *p, size_t idx)
117: {
118: int32_t seed = htole32(sess->seed);
119:
1.4 deraadt 120: assert(p->state == DOWNLOAD_READ_NEXT);
1.1 benno 121:
122: p->idx = idx;
123: memset(&p->blk, 0, sizeof(struct blkset));
124: p->map = MAP_FAILED;
125: p->mapsz = 0;
126: p->ofd = -1;
127: p->fd = -1;
128: p->fname = NULL;
129: MD4_Init(&p->ctx);
130: p->downloaded = p->total = 0;
131: /* Don't touch p->fl. */
132: /* Don't touch p->flsz. */
133: /* Don't touch p->rootfd. */
134: /* Don't touch p->fdin. */
135: MD4_Update(&p->ctx, &seed, sizeof(int32_t));
136: }
137:
138: /*
139: * Free a download context.
140: * If "cleanup" is non-zero, we also try to clean up the temporary file,
141: * assuming that it has been opened in p->fd.
142: */
143: static void
144: download_cleanup(struct download *p, int cleanup)
145: {
146:
1.4 deraadt 147: if (p->map != MAP_FAILED) {
1.1 benno 148: assert(p->mapsz);
149: munmap(p->map, p->mapsz);
150: p->map = MAP_FAILED;
151: p->mapsz = 0;
152: }
1.4 deraadt 153: if (p->ofd != -1) {
1.1 benno 154: close(p->ofd);
155: p->ofd = -1;
156: }
1.4 deraadt 157: if (p->fd != -1) {
1.1 benno 158: close(p->fd);
1.4 deraadt 159: if (cleanup && p->fname != NULL)
1.1 benno 160: unlinkat(p->rootfd, p->fname, 0);
161: p->fd = -1;
162: }
163: free(p->fname);
164: p->fname = NULL;
165: p->state = DOWNLOAD_READ_NEXT;
166: }
167:
168: /*
169: * Initial allocation of the download object using the file list "fl" of
170: * size "flsz", the destination "rootfd", and the sender read "fdin".
171: * Returns NULL on allocation failure.
172: * On success, download_free() must be called with the pointer.
173: */
174: struct download *
1.2 benno 175: download_alloc(struct sess *sess, int fdin,
1.1 benno 176: const struct flist *fl, size_t flsz, int rootfd)
177: {
178: struct download *p;
179:
1.4 deraadt 180: if ((p = malloc(sizeof(struct download))) == NULL) {
1.1 benno 181: ERR(sess, "malloc");
182: return NULL;
183: }
184:
185: p->state = DOWNLOAD_READ_NEXT;
186: p->fl = fl;
187: p->flsz = flsz;
188: p->rootfd = rootfd;
189: p->fdin = fdin;
190: download_reinit(sess, p, 0);
191: p->obufsz = 0;
192: p->obuf = NULL;
193: p->obufmax = OBUF_SIZE;
1.4 deraadt 194: if (p->obufmax && (p->obuf = malloc(p->obufmax)) == NULL) {
1.1 benno 195: ERR(sess, "malloc");
196: free(p);
197: return NULL;
198: }
199: return p;
200: }
201:
202: /*
203: * Perform all cleanups (including removing stray files) and free.
204: * Passing a NULL to this function is ok.
205: */
206: void
207: download_free(struct download *p)
208: {
209:
1.4 deraadt 210: if (p == NULL)
1.1 benno 211: return;
212: download_cleanup(p, 1);
213: free(p->obuf);
214: free(p);
215: }
216:
217: /*
218: * Optimisation: instead of dumping directly into the output file, keep
219: * a buffer and write as much as we can into the buffer.
220: * That way, we can avoid calling write() too much, and instead call it
221: * with big buffers.
222: * To flush the buffer w/o changing it, pass 0 as "sz".
223: * Returns zero on failure, non-zero on success.
224: */
225: static int
1.2 benno 226: buf_copy(struct sess *sess,
1.1 benno 227: const char *buf, size_t sz, struct download *p)
228: {
229: size_t rem, tocopy;
230: ssize_t ssz;
231:
232: assert(p->obufsz <= p->obufmax);
233:
1.2 benno 234: /*
1.1 benno 235: * Copy as much as we can.
236: * If we've copied everything, exit.
237: * If we have no pre-write buffer (obufmax of zero), this never
238: * gets called, so we never buffer anything.
239: */
240:
241: if (sz && p->obufsz < p->obufmax) {
1.4 deraadt 242: assert(p->obuf != NULL);
1.1 benno 243: rem = p->obufmax - p->obufsz;
244: assert(rem > 0);
245: tocopy = rem < sz ? rem : sz;
246: memcpy(p->obuf + p->obufsz, buf, tocopy);
247: sz -= tocopy;
248: buf += tocopy;
249: p->obufsz += tocopy;
250: assert(p->obufsz <= p->obufmax);
1.4 deraadt 251: if (sz == 0)
1.1 benno 252: return 1;
253: }
254:
255: /* Drain the main buffer. */
256:
257: if (p->obufsz) {
258: assert(p->obufmax);
259: assert(p->obufsz <= p->obufmax);
1.4 deraadt 260: assert(p->obuf != NULL);
1.1 benno 261: if ((ssz = write(p->fd, p->obuf, p->obufsz)) < 0) {
262: ERR(sess, "%s: write", p->fname);
263: return 0;
264: } else if ((size_t)ssz != p->obufsz) {
265: ERRX(sess, "%s: short write", p->fname);
266: return 0;
267: }
268: p->obufsz = 0;
269: }
270:
1.2 benno 271: /*
1.1 benno 272: * Now drain anything left.
273: * If we have no pre-write buffer, this is it.
274: */
275:
276: if (sz) {
277: if ((ssz = write(p->fd, buf, sz)) < 0) {
278: ERR(sess, "%s: write", p->fname);
279: return 0;
280: } else if ((size_t)ssz != sz) {
281: ERRX(sess, "%s: short write", p->fname);
282: return 0;
283: }
284: }
285: return 1;
286: }
287:
288: /*
289: * The downloader waits on a file the sender is going to give us, opens
290: * and mmaps the existing file, opens a temporary file, dumps the file
291: * (or metadata) into the temporary file, then renames.
292: * This happens in several possible phases to avoid blocking.
293: * Returns <0 on failure, 0 on no more data (end of phase), >0 on
294: * success (more data to be read from the sender).
295: */
296: int
297: rsync_downloader(struct download *p, struct sess *sess, int *ofd)
298: {
299: int32_t idx, rawtok;
300: const struct flist *f;
1.10 ! florian 301: size_t sz, tok;
1.1 benno 302: mode_t perm;
1.2 benno 303: struct stat st;
1.1 benno 304: char *buf = NULL;
1.2 benno 305: unsigned char ourmd[MD4_DIGEST_LENGTH],
1.1 benno 306: md[MD4_DIGEST_LENGTH];
307:
308: /*
309: * If we don't have a download already in session, then the next
310: * one is coming in.
311: * Read either the stop (phase) signal from the sender or block
312: * metadata, in which case we open our file and wait for data.
313: */
314:
1.4 deraadt 315: if (p->state == DOWNLOAD_READ_NEXT) {
1.3 deraadt 316: if (!io_read_int(sess, p->fdin, &idx)) {
1.1 benno 317: ERRX1(sess, "io_read_int");
318: return -1;
319: } else if (idx >= 0 && (size_t)idx >= p->flsz) {
320: ERRX(sess, "index out of bounds");
321: return -1;
322: } else if (idx < 0) {
323: LOG3(sess, "downloader: phase complete");
324: return 0;
325: }
326:
327: /* Short-circuit: dry_run mode does nothing. */
328:
329: if (sess->opts->dry_run)
330: return 1;
331:
1.2 benno 332: /*
1.1 benno 333: * Now get our block information.
334: * This is all we'll need to reconstruct the file from
335: * the map, as block sizes are regular.
336: */
337:
338: download_reinit(sess, p, idx);
1.3 deraadt 339: if (!blk_send_ack(sess, p->fdin, &p->blk)) {
1.1 benno 340: ERRX1(sess, "blk_send_ack");
341: goto out;
342: }
343:
1.2 benno 344: /*
1.1 benno 345: * Next, we want to open the existing file for using as
346: * block input.
347: * We do this in a non-blocking way, so if the open
348: * succeeds, then we'll go reentrant til the file is
349: * readable and we can mmap() it.
350: * Set the file descriptor that we want to wait for.
351: */
352:
353: p->state = DOWNLOAD_READ_LOCAL;
354: f = &p->fl[idx];
1.4 deraadt 355: p->ofd = openat(p->rootfd, f->path, O_RDONLY | O_NONBLOCK, 0);
1.1 benno 356:
1.4 deraadt 357: if (p->ofd == -1 && errno != ENOENT) {
1.1 benno 358: ERR(sess, "%s: openat", f->path);
359: goto out;
1.4 deraadt 360: } else if (p->ofd != -1) {
1.1 benno 361: *ofd = p->ofd;
362: return 1;
363: }
364:
365: /* Fall-through: there's no file. */
366: }
367:
368: /*
369: * At this point, the server is sending us data and we want to
370: * hoover it up as quickly as possible or we'll deadlock.
371: * We want to be pulling off of f->fdin as quickly as possible,
372: * so perform as much buffering as we can.
373: */
374:
375: f = &p->fl[p->idx];
376:
377: /*
378: * Next in sequence: we have an open download session but
379: * haven't created our temporary file.
380: * This means that we've already opened (or tried to open) the
381: * original file in a nonblocking way, and we can map it.
382: */
383:
1.4 deraadt 384: if (p->state == DOWNLOAD_READ_LOCAL) {
385: assert(p->fname == NULL);
1.1 benno 386:
1.2 benno 387: /*
1.1 benno 388: * Try to fstat() the file descriptor if valid and make
389: * sure that we're still a regular file.
390: * Then, if it has non-zero size, mmap() it for hashing.
391: */
392:
1.4 deraadt 393: if (p->ofd != -1 &&
394: fstat(p->ofd, &st) == -1) {
1.1 benno 395: ERR(sess, "%s: fstat", f->path);
396: goto out;
1.4 deraadt 397: } else if (p->ofd != -1 && !S_ISREG(st.st_mode)) {
1.1 benno 398: WARNX(sess, "%s: not regular", f->path);
399: goto out;
400: }
401:
1.4 deraadt 402: if (p->ofd != -1 && st.st_size > 0) {
1.1 benno 403: p->mapsz = st.st_size;
1.2 benno 404: p->map = mmap(NULL, p->mapsz,
1.1 benno 405: PROT_READ, MAP_SHARED, p->ofd, 0);
1.4 deraadt 406: if (p->map == MAP_FAILED) {
1.1 benno 407: ERR(sess, "%s: mmap", f->path);
408: goto out;
409: }
410: }
411:
412: /* Success either way: we don't need this. */
413:
414: *ofd = -1;
415:
1.10 ! florian 416: /* Create the temporary file. */
1.1 benno 417:
1.10 ! florian 418: if (mktemplate(&p->fname, f->path, sess->opts->recursive)
! 419: == -1) {
! 420: ERR(sess, "asprintf");
! 421: goto out;
1.1 benno 422: }
1.10 ! florian 423:
! 424: if ((p->fd = mkstempat(p->rootfd, p->fname)) == -1) {
! 425: ERR(sess, "%s: openat", p->fname);
1.1 benno 426: goto out;
427: }
428:
1.2 benno 429: /*
1.1 benno 430: * Inherit permissions from the source file if we're new
431: * or specifically told with -p.
432: */
433:
1.3 deraadt 434: if (!sess->opts->preserve_perms)
1.1 benno 435: perm = -1 == p->ofd ? f->st.mode : st.st_mode;
436: else
437: perm = f->st.mode;
438:
1.10 ! florian 439: if (fchmod(p->fd, perm) == -1) {
! 440: ERR(sess, "%s: fchmod", p->fname);
! 441: (void)unlinkat(p->rootfd, p->fname, 0);
1.1 benno 442: goto out;
443: }
444:
1.2 benno 445: /*
1.1 benno 446: * FIXME: we can technically wait until the temporary
447: * file is writable, but since it's guaranteed to be
448: * empty, I don't think this is a terribly expensive
449: * operation as it doesn't involve reading the file into
450: * memory beforehand.
451: */
452:
453: LOG3(sess, "%s: temporary: %s", f->path, p->fname);
454: p->state = DOWNLOAD_READ_REMOTE;
455: return 1;
456: }
457:
458: /*
459: * This matches the sequence in blk_flush().
460: * If we've gotten here, then we have a possibly-open map file
461: * (not for new files) and our temporary file is writable.
462: * We read the size/token, then optionally the data.
463: * The size >0 for reading data, 0 for no more data, and <0 for
464: * a token indicator.
465: */
466:
1.4 deraadt 467: assert(p->state == DOWNLOAD_READ_REMOTE);
468: assert(p->fname != NULL);
469: assert(p->fd != -1);
470: assert(p->fdin != -1);
1.1 benno 471:
1.3 deraadt 472: if (!io_read_int(sess, p->fdin, &rawtok)) {
1.1 benno 473: ERRX1(sess, "io_read_int");
474: goto out;
1.2 benno 475: }
1.1 benno 476:
477: if (rawtok > 0) {
478: sz = rawtok;
1.4 deraadt 479: if ((buf = malloc(sz)) == NULL) {
1.1 benno 480: ERR(sess, "realloc");
481: goto out;
482: }
1.3 deraadt 483: if (!io_read_buf(sess, p->fdin, buf, sz)) {
1.1 benno 484: ERRX1(sess, "io_read_int");
485: goto out;
1.3 deraadt 486: } else if (!buf_copy(sess, buf, sz, p)) {
1.1 benno 487: ERRX1(sess, "buf_copy");
488: goto out;
489: }
490: p->total += sz;
491: p->downloaded += sz;
492: LOG4(sess, "%s: received %zu B block", p->fname, sz);
493: MD4_Update(&p->ctx, buf, sz);
494: free(buf);
495: return 1;
496: } else if (rawtok < 0) {
497: tok = -rawtok - 1;
498: if (tok >= p->blk.blksz) {
499: ERRX(sess, "%s: token not in block "
1.2 benno 500: "set: %zu (have %zu blocks)",
1.1 benno 501: p->fname, tok, p->blk.blksz);
502: goto out;
503: }
504: sz = tok == p->blk.blksz - 1 ? p->blk.rem : p->blk.len;
505: assert(sz);
1.4 deraadt 506: assert(p->map != MAP_FAILED);
1.1 benno 507: buf = p->map + (tok * p->blk.len);
508:
509: /*
510: * Now we read from our block.
511: * We should only be at this point if we have a
512: * block to read from, i.e., if we were able to
513: * map our origin file and create a block
514: * profile from it.
515: */
516:
1.4 deraadt 517: assert(p->map != MAP_FAILED);
1.3 deraadt 518: if (!buf_copy(sess, buf, sz, p)) {
1.1 benno 519: ERRX1(sess, "buf_copy");
520: goto out;
521: }
522: p->total += sz;
523: LOG4(sess, "%s: copied %zu B", p->fname, sz);
524: MD4_Update(&p->ctx, buf, sz);
525: return 1;
526: }
527:
1.3 deraadt 528: if (!buf_copy(sess, NULL, 0, p)) {
1.1 benno 529: ERRX1(sess, "buf_copy");
530: goto out;
531: }
532:
1.4 deraadt 533: assert(rawtok == 0);
534: assert(p->obufsz == 0);
1.1 benno 535:
1.2 benno 536: /*
1.1 benno 537: * Make sure our resulting MD4 hashes match.
538: * FIXME: if the MD4 hashes don't match, then our file has
539: * changed out from under us.
540: * This should require us to re-run the sequence in another
541: * phase.
542: */
543:
544: MD4_Final(ourmd, &p->ctx);
545:
1.3 deraadt 546: if (!io_read_buf(sess, p->fdin, md, MD4_DIGEST_LENGTH)) {
1.1 benno 547: ERRX1(sess, "io_read_buf");
548: goto out;
549: } else if (memcmp(md, ourmd, MD4_DIGEST_LENGTH)) {
550: ERRX(sess, "%s: hash does not match", p->fname);
551: goto out;
1.5 benno 552: }
553:
1.9 florian 554: /* Adjust our file metadata (uid, mode, etc.). */
1.6 benno 555:
1.9 florian 556: if (!rsync_set_metadata(sess, 1, p->fd, f, p->fname)) {
557: ERRX1(sess, "rsync_set_metadata");
558: goto out;
1.1 benno 559: }
560:
561: /* Finally, rename the temporary to the real file. */
562:
1.4 deraadt 563: if (renameat(p->rootfd, p->fname, p->rootfd, f->path) == -1) {
1.1 benno 564: ERR(sess, "%s: renameat: %s", p->fname, f->path);
565: goto out;
566: }
567:
568: log_file(sess, p, f);
569: download_cleanup(p, 0);
570: return 1;
571: out:
572: download_cleanup(p, 1);
573: return -1;
574: }