Fix some bugs when testing opensds ansible
[stor4nfv.git] / src / ceph / src / test / librbd / fsx.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:8; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=8 smarttab
3 /*
4  *      Copyright (C) 1991, NeXT Computer, Inc.  All Rights Reserverd.
5  *
6  *      File:   fsx.cc
7  *      Author: Avadis Tevanian, Jr.
8  *
9  *      File system exerciser.
10  *
11  *      Rewritten 8/98 by Conrad Minshall.
12  *
13  *      Small changes to work under Linux -- davej.
14  *
15  *      Checks for mmap last-page zero fill.
16  */
17
18 #include <sys/types.h>
19 #include <unistd.h>
20 #include <limits.h>
21 #include <time.h>
22 #include <strings.h>
23 #include <sys/file.h>
24 #include <sys/stat.h>
25 #include <sys/mman.h>
26 #include <linux/fs.h>
27 #include <sys/ioctl.h>
28 #ifdef HAVE_ERR_H
29 #include <err.h>
30 #endif
31 #include <signal.h>
32 #include <stdbool.h>
33 #include <stddef.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <stdarg.h>
38 #include <assert.h>
39 #include <errno.h>
40 #include <math.h>
41 #include <fcntl.h>
42 #include <random>
43
44 #include "include/intarith.h"
45 #include "include/krbd.h"
46 #include "include/rados/librados.h"
47 #include "include/rados/librados.hpp"
48 #include "include/rbd/librbd.h"
49 #include "include/rbd/librbd.hpp"
50 #include "common/Cond.h"
51 #include "common/SubProcess.h"
52 #include "common/safe_io.h"
53 #include "journal/Journaler.h"
54 #include "journal/ReplayEntry.h"
55 #include "journal/ReplayHandler.h"
56 #include "journal/Settings.h"
57
58 #include <boost/scope_exit.hpp>
59
60 #define NUMPRINTCOLUMNS 32      /* # columns of data to print on each line */
61
62 /*
63  *      A log entry is an operation and a bunch of arguments.
64  */
65
66 struct log_entry {
67         int     operation;
68         int     args[3];
69 };
70
71 #define LOGSIZE 1000
72
73 struct log_entry        oplog[LOGSIZE]; /* the log */
74 int                     logptr = 0;     /* current position in log */
75 int                     logcount = 0;   /* total ops */
76
77 /*
78  * The operation matrix is complex due to conditional execution of different
79  * features. Hence when we come to deciding what operation to run, we need to
80  * be careful in how we select the different operations. The active operations
81  * are mapped to numbers as follows:
82  *
83  *              lite    !lite
84  * READ:        0       0
85  * WRITE:       1       1
86  * MAPREAD:     2       2
87  * MAPWRITE:    3       3
88  * TRUNCATE:    -       4
89  * FALLOCATE:   -       5
90  * PUNCH HOLE:  -       6
91  * WRITESAME:   -       7
92  * COMPAREANDWRITE:     -       8
93  *
94  * When mapped read/writes are disabled, they are simply converted to normal
95  * reads and writes. When fallocate/fpunch calls are disabled, they are
96  * converted to OP_SKIPPED. Hence OP_SKIPPED needs to have a number higher than
97  * the operation selction matrix, as does the OP_CLOSEOPEN which is an
98  * operation modifier rather than an operation in itself.
99  *
100  * Because of the "lite" version, we also need to have different "maximum
101  * operation" defines to allow the ops to be selected correctly based on the
102  * mode being run.
103  */
104
105 /* common operations */
106 #define OP_READ         0
107 #define OP_WRITE        1
108 #define OP_MAPREAD      2
109 #define OP_MAPWRITE     3
110 #define OP_MAX_LITE     4
111
112 /* !lite operations */
113 #define OP_TRUNCATE     4
114 #define OP_FALLOCATE    5
115 #define OP_PUNCH_HOLE   6
116 #define OP_WRITESAME    7
117 #define OP_COMPARE_AND_WRITE    8
118 /* rbd-specific operations */
119 #define OP_CLONE        9
120 #define OP_FLATTEN      10
121 #define OP_MAX_FULL     11
122
123 /* operation modifiers */
124 #define OP_CLOSEOPEN    100
125 #define OP_SKIPPED      101
126
127 #undef PAGE_SIZE
128 #define PAGE_SIZE       getpagesize()
129 #undef PAGE_MASK
130 #define PAGE_MASK       (PAGE_SIZE - 1)
131
132
133 char    *original_buf;                  /* a pointer to the original data */
134 char    *good_buf;                      /* a pointer to the correct data */
135 char    *temp_buf;                      /* a pointer to the current data */
136
137 char    dirpath[1024];
138
139 off_t           file_size = 0;
140 off_t           biggest = 0;
141 unsigned long   testcalls = 0;          /* calls to function "test" */
142
143 unsigned long   simulatedopcount = 0;   /* -b flag */
144 int     closeprob = 0;                  /* -c flag */
145 int     debug = 0;                      /* -d flag */
146 unsigned long   debugstart = 0;         /* -D flag */
147 int     flush_enabled = 0;              /* -f flag */
148 int     holebdy = 1;                    /* -h flag */
149 bool    journal_replay = false;         /* -j flah */
150 int     keep_on_success = 0;            /* -k flag */
151 int     do_fsync = 0;                   /* -y flag */
152 unsigned long   maxfilelen = 256 * 1024;        /* -l flag */
153 int     sizechecks = 1;                 /* -n flag disables them */
154 int     maxoplen = 64 * 1024;           /* -o flag */
155 int     quiet = 0;                      /* -q flag */
156 unsigned long progressinterval = 0;     /* -p flag */
157 int     readbdy = 1;                    /* -r flag */
158 int     style = 0;                      /* -s flag */
159 int     prealloc = 0;                   /* -x flag */
160 int     truncbdy = 1;                   /* -t flag */
161 int     writebdy = 1;                   /* -w flag */
162 long    monitorstart = -1;              /* -m flag */
163 long    monitorend = -1;                /* -m flag */
164 int     lite = 0;                       /* -L flag */
165 long    numops = -1;                    /* -N flag */
166 int     randomoplen = 1;                /* -O flag disables it */
167 int     seed = 1;                       /* -S flag */
168 int     mapped_writes = 0;              /* -W flag disables */
169 int     fallocate_calls = 0;            /* -F flag disables */
170 int     punch_hole_calls = 1;           /* -H flag disables */
171 int     clone_calls = 1;                /* -C flag disables */
172 int     randomize_striping = 1;         /* -U flag disables */
173 int     randomize_parent_overlap = 1;
174 int     mapped_reads = 0;               /* -R flag disables it */
175 int     fsxgoodfd = 0;
176 int     o_direct = 0;                   /* -Z flag */
177
178 int num_clones = 0;
179
180 int page_size;
181 int page_mask;
182 int mmap_mask;
183
184 FILE *  fsxlogf = NULL;
185 int badoff = -1;
186 int closeopen = 0;
187
188 void
189 vwarnc(int code, const char *fmt, va_list ap) {
190   fprintf(stderr, "fsx: ");
191   if (fmt != NULL) {
192         vfprintf(stderr, fmt, ap);
193         fprintf(stderr, ": ");
194   }
195   fprintf(stderr, "%s\n", strerror(code));
196 }
197
198 void
199 warn(const char * fmt, ...)  {
200         va_list ap;
201         va_start(ap, fmt);
202         vwarnc(errno, fmt, ap);
203         va_end(ap);
204 }
205
206 #define BUF_SIZE 1024
207
208 void
209 prt(const char *fmt, ...)
210 {
211         va_list args;
212         char buffer[BUF_SIZE];
213
214         va_start(args, fmt);
215         vsnprintf(buffer, BUF_SIZE, fmt, args);
216         va_end(args);
217         fprintf(stdout, "%s", buffer);
218         if (fsxlogf)
219                 fprintf(fsxlogf, "%s", buffer);
220 }
221
222 void
223 prterr(const char *prefix)
224 {
225         prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(errno));
226 }
227
228 void
229 prterrcode(const char *prefix, int code)
230 {
231         prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(-code));
232 }
233
234 void
235 simple_err(const char *msg, int err)
236 {
237     fprintf(stderr, "%s: %s\n", msg, strerror(-err));
238 }
239
240 /*
241  * random
242  */
243 std::mt19937 random_generator;
244
245 uint_fast32_t
246 get_random(void)
247 {
248         return random_generator();
249 }
250
251 void replay_imagename(char *buf, size_t len, int clones);
252
253 namespace {
254
255 static const std::string JOURNAL_CLIENT_ID("fsx");
256
257 struct ReplayHandler : public journal::ReplayHandler {
258         journal::Journaler *journaler;
259         journal::Journaler *replay_journaler;
260         Context *on_finish;
261
262         ReplayHandler(journal::Journaler *journaler,
263                       journal::Journaler *replay_journaler, Context *on_finish)
264                 : journaler(journaler), replay_journaler(replay_journaler),
265                   on_finish(on_finish) {
266         }
267
268         void get() override {
269         }
270         void put() override {
271         }
272
273         void handle_entries_available() override {
274                 while (true) {
275                         journal::ReplayEntry replay_entry;
276                         if (!journaler->try_pop_front(&replay_entry)) {
277                                 return;
278                         }
279
280                         replay_journaler->append(0, replay_entry.get_data());
281                 }
282         }
283
284         void handle_complete(int r) override {
285                 on_finish->complete(r);
286         }
287 };
288
289 int get_image_id(librados::IoCtx &io_ctx, const char *image_name,
290                  std::string *image_id) {
291         librbd::RBD rbd;
292         librbd::Image image;
293         int r = rbd.open(io_ctx, image, image_name);
294         if (r < 0) {
295                 simple_err("failed to open image", r);
296                 return r;
297         }
298
299         rbd_image_info_t info;
300         r = image.stat(info, sizeof(info));
301         if (r < 0) {
302                 simple_err("failed to stat image", r);
303                 return r;
304         }
305
306         *image_id = std::string(&info.block_name_prefix[strlen(RBD_DATA_PREFIX)]);
307         return 0;
308 }
309
310 int register_journal(rados_ioctx_t ioctx, const char *image_name) {
311         librados::IoCtx io_ctx;
312         librados::IoCtx::from_rados_ioctx_t(ioctx, io_ctx);
313
314         std::string image_id;
315         int r = get_image_id(io_ctx, image_name, &image_id);
316         if (r < 0) {
317                 return r;
318         }
319
320         journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, {});
321         r = journaler.register_client(bufferlist());
322         if (r < 0) {
323                 simple_err("failed to register journal client", r);
324                 return r;
325         }
326         return 0;
327 }
328
329 int unregister_journal(rados_ioctx_t ioctx, const char *image_name) {
330         librados::IoCtx io_ctx;
331         librados::IoCtx::from_rados_ioctx_t(ioctx, io_ctx);
332
333         std::string image_id;
334         int r = get_image_id(io_ctx, image_name, &image_id);
335         if (r < 0) {
336                 return r;
337         }
338
339         journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, {});
340         r = journaler.unregister_client();
341         if (r < 0) {
342                 simple_err("failed to unregister journal client", r);
343                 return r;
344         }
345         return 0;
346 }
347
348 int create_replay_image(rados_ioctx_t ioctx, int order,
349                         uint64_t stripe_unit, int stripe_count,
350                         const char *replay_image_name,
351                         const char *last_replay_image_name) {
352         librados::IoCtx io_ctx;
353         librados::IoCtx::from_rados_ioctx_t(ioctx, io_ctx);
354
355         int r;
356         librbd::RBD rbd;
357         if (last_replay_image_name == nullptr) {
358                 r = rbd.create2(io_ctx, replay_image_name, 0,
359                                 RBD_FEATURES_ALL, &order);
360         } else {
361                 r = rbd.clone2(io_ctx, last_replay_image_name, "snap",
362                                io_ctx, replay_image_name, RBD_FEATURES_ALL,
363                                &order, stripe_unit, stripe_count);
364         }
365
366         if (r < 0) {
367                 simple_err("failed to create replay image", r);
368                 return r;
369         }
370
371         return 0;
372 }
373
374 int replay_journal(rados_ioctx_t ioctx, const char *image_name,
375                    const char *replay_image_name) {
376         librados::IoCtx io_ctx;
377         librados::IoCtx::from_rados_ioctx_t(ioctx, io_ctx);
378
379         std::string image_id;
380         int r = get_image_id(io_ctx, image_name, &image_id);
381         if (r < 0) {
382                 return r;
383         }
384
385         std::string replay_image_id;
386         r = get_image_id(io_ctx, replay_image_name, &replay_image_id);
387         if (r < 0) {
388                 return r;
389         }
390
391         journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, {});
392         C_SaferCond init_ctx;
393         journaler.init(&init_ctx);
394         BOOST_SCOPE_EXIT_ALL( (&journaler) ) {
395                 journaler.shut_down();
396         };
397
398         r = init_ctx.wait();
399         if (r < 0) {
400                 simple_err("failed to initialize journal", r);
401                 return r;
402         }
403
404         journal::Journaler replay_journaler(io_ctx, replay_image_id, "", {});
405
406         C_SaferCond replay_init_ctx;
407         replay_journaler.init(&replay_init_ctx);
408         BOOST_SCOPE_EXIT_ALL( (&replay_journaler) ) {
409                 replay_journaler.shut_down();
410         };
411
412         r = replay_init_ctx.wait();
413         if (r < 0) {
414                 simple_err("failed to initialize replay journal", r);
415                 return r;
416         }
417
418         replay_journaler.start_append(0, 0, 0);
419
420         C_SaferCond replay_ctx;
421         ReplayHandler replay_handler(&journaler, &replay_journaler,
422                                      &replay_ctx);
423
424         // copy journal events from source image to replay image
425         journaler.start_replay(&replay_handler);
426         r = replay_ctx.wait();
427
428         journaler.stop_replay();
429
430         C_SaferCond stop_ctx;
431         replay_journaler.stop_append(&stop_ctx);
432         int stop_r = stop_ctx.wait();
433         if (r == 0 && stop_r < 0) {
434                 r = stop_r;
435         }
436
437         if (r < 0) {
438                 simple_err("failed to replay journal", r);
439                 return r;
440         }
441
442         librbd::RBD rbd;
443         librbd::Image image;
444         r = rbd.open(io_ctx, image, replay_image_name);
445         if (r < 0) {
446                 simple_err("failed to open replay image", r);
447                 return r;
448         }
449
450         // perform an IO op to initiate the journal replay
451         bufferlist bl;
452         r = static_cast<ssize_t>(image.write(0, 0, bl));
453         if (r < 0) {
454                 simple_err("failed to write to replay image", r);
455                 return r;
456         }
457         return 0;
458 }
459
460 int finalize_journal(rados_ioctx_t ioctx, const char *imagename, int clones,
461                      int order, uint64_t stripe_unit, int stripe_count) {
462         char replayimagename[1024];
463         replay_imagename(replayimagename, sizeof(replayimagename), clones);
464
465         char lastreplayimagename[1024];
466         if (clones > 0) {
467                 replay_imagename(lastreplayimagename,
468                                  sizeof(lastreplayimagename), clones - 1);
469         }
470
471         int ret = create_replay_image(ioctx, order, stripe_unit,
472                                       stripe_count, replayimagename,
473                                       clones > 0 ? lastreplayimagename :
474                                                    nullptr);
475         if (ret < 0) {
476                 exit(EXIT_FAILURE);
477         }
478
479         ret = replay_journal(ioctx, imagename, replayimagename);
480         if (ret < 0) {
481                 exit(EXIT_FAILURE);
482         }
483         return 0;
484 }
485
486 } // anonymous namespace
487
488 /*
489  * rbd
490  */
491
492 struct rbd_ctx {
493         const char *name;       /* image name */
494         rbd_image_t image;      /* image handle */
495         const char *krbd_name;  /* image /dev/rbd<id> name */ /* reused for nbd test */
496         int krbd_fd;            /* image /dev/rbd<id> fd */ /* reused for nbd test */
497 };
498
499 #define RBD_CTX_INIT    (struct rbd_ctx) { NULL, NULL, NULL, -1}
500
501 struct rbd_operations {
502         int (*open)(const char *name, struct rbd_ctx *ctx);
503         int (*close)(struct rbd_ctx *ctx);
504         ssize_t (*read)(struct rbd_ctx *ctx, uint64_t off, size_t len, char *buf);
505         ssize_t (*write)(struct rbd_ctx *ctx, uint64_t off, size_t len, const char *buf);
506         int (*flush)(struct rbd_ctx *ctx);
507         int (*discard)(struct rbd_ctx *ctx, uint64_t off, uint64_t len);
508         int (*get_size)(struct rbd_ctx *ctx, uint64_t *size);
509         int (*resize)(struct rbd_ctx *ctx, uint64_t size);
510         int (*clone)(struct rbd_ctx *ctx, const char *src_snapname,
511                      const char *dst_imagename, int *order, int stripe_unit,
512                      int stripe_count);
513         int (*flatten)(struct rbd_ctx *ctx);
514         ssize_t (*writesame)(struct rbd_ctx *ctx, uint64_t off, size_t len,
515                              const char *buf, size_t data_len);
516         ssize_t (*compare_and_write)(struct rbd_ctx *ctx, uint64_t off, size_t len,
517                                      const char *cmp_buf, const char *buf);
518 };
519
520 char *pool;                     /* name of the pool our test image is in */
521 char *iname;                    /* name of our test image */
522 rados_t cluster;                /* handle for our test cluster */
523 rados_ioctx_t ioctx;            /* handle for our test pool */
524 struct krbd_ctx *krbd;          /* handle for libkrbd */
525 bool skip_partial_discard;      /* rbd_skip_partial_discard config value*/
526
527 /*
528  * librbd/krbd rbd_operations handlers.  Given the rest of fsx.c, no
529  * attempt to do error handling is made in these handlers.
530  */
531
532 int
533 __librbd_open(const char *name, struct rbd_ctx *ctx)
534 {
535         rbd_image_t image;
536         int ret;
537
538         assert(!ctx->name && !ctx->image &&
539                !ctx->krbd_name && ctx->krbd_fd < 0);
540
541         ret = rbd_open(ioctx, name, &image, NULL);
542         if (ret < 0) {
543                 prt("rbd_open(%s) failed\n", name);
544                 return ret;
545         }
546
547         ctx->name = strdup(name);
548         ctx->image = image;
549         ctx->krbd_name = NULL;
550         ctx->krbd_fd = -1;
551
552         return 0;
553 }
554
555 int
556 librbd_open(const char *name, struct rbd_ctx *ctx)
557 {
558         return __librbd_open(name, ctx);
559 }
560
561 int
562 __librbd_close(struct rbd_ctx *ctx)
563 {
564         int ret;
565
566         assert(ctx->name && ctx->image);
567
568         ret = rbd_close(ctx->image);
569         if (ret < 0) {
570                 prt("rbd_close(%s) failed\n", ctx->name);
571                 return ret;
572         }
573
574         free((void *)ctx->name);
575
576         ctx->name = NULL;
577         ctx->image = NULL;
578
579         return 0;
580 }
581
582 int
583 librbd_close(struct rbd_ctx *ctx)
584 {
585         return __librbd_close(ctx);
586 }
587
588 int
589 librbd_verify_object_map(struct rbd_ctx *ctx)
590 {
591         int n;
592         uint64_t flags;
593         n = rbd_get_flags(ctx->image, &flags);
594         if (n < 0) {
595                 prt("rbd_get_flags() failed\n");
596                 return n;
597         }
598
599         if ((flags & RBD_FLAG_OBJECT_MAP_INVALID) != 0) {
600                 prt("rbd_get_flags() indicates object map is invalid\n");
601                 return -EINVAL;
602         }
603         return 0;
604 }
605
606 ssize_t
607 librbd_read(struct rbd_ctx *ctx, uint64_t off, size_t len, char *buf)
608 {
609         ssize_t n;
610
611         n = rbd_read(ctx->image, off, len, buf);
612         if (n < 0)
613                 prt("rbd_read(%llu, %zu) failed\n", off, len);
614
615         return n;
616 }
617
618 ssize_t
619 librbd_write(struct rbd_ctx *ctx, uint64_t off, size_t len, const char *buf)
620 {
621         ssize_t n;
622         int ret;
623
624         n = rbd_write(ctx->image, off, len, buf);
625         if (n < 0) {
626                 prt("rbd_write(%llu, %zu) failed\n", off, len);
627                 return n;
628         }
629
630         ret = librbd_verify_object_map(ctx);
631         if (ret < 0) {
632                 return ret;
633         }
634         return n;
635 }
636
637 int
638 librbd_flush(struct rbd_ctx *ctx)
639 {
640         int ret;
641
642         ret = rbd_flush(ctx->image);
643         if (ret < 0) {
644                 prt("rbd_flush failed\n");
645                 return ret;
646         }
647
648         return librbd_verify_object_map(ctx);
649 }
650
651 int
652 librbd_discard(struct rbd_ctx *ctx, uint64_t off, uint64_t len)
653 {
654         int ret;
655
656         ret = rbd_discard(ctx->image, off, len);
657         if (ret < 0) {
658                 prt("rbd_discard(%llu, %llu) failed\n", off, len);
659                 return ret;
660         }
661
662         return librbd_verify_object_map(ctx);
663 }
664
665 ssize_t
666 librbd_writesame(struct rbd_ctx *ctx, uint64_t off, size_t len,
667                  const char *buf, size_t data_len)
668 {
669         ssize_t n;
670         int ret;
671
672         n = rbd_writesame(ctx->image, off, len, buf, data_len, 0);
673         if (n < 0) {
674                 prt("rbd_writesame(%llu, %zu) failed\n", off, len);
675                 return n;
676         }
677
678         ret = librbd_verify_object_map(ctx);
679         if (ret < 0) {
680                 return ret;
681         }
682         return n;
683 }
684
685 ssize_t
686 librbd_compare_and_write(struct rbd_ctx *ctx, uint64_t off, size_t len,
687                          const char *cmp_buf, const char *buf)
688 {
689         ssize_t n;
690         int ret;
691         uint64_t mismatch_off = 0;
692
693         n = rbd_compare_and_write(ctx->image, off, len, cmp_buf, buf, &mismatch_off, 0);
694         if (n == -EINVAL) {
695                 return n;
696         } else if (n < 0) {
697                 prt("rbd_compare_and_write mismatch(%llu, %zu, %llu) failed\n",
698                     off, len, mismatch_off);
699                 return n;
700         }
701
702         ret = librbd_verify_object_map(ctx);
703         if (ret < 0) {
704                 return ret;
705         }
706         return n;
707
708 }
709
710 int
711 librbd_get_size(struct rbd_ctx *ctx, uint64_t *size)
712 {
713         rbd_image_info_t info;
714         int ret;
715
716         ret = rbd_stat(ctx->image, &info, sizeof(info));
717         if (ret < 0) {
718                 prt("rbd_stat failed\n");
719                 return ret;
720         }
721
722         *size = info.size;
723
724         return 0;
725 }
726
727 int
728 __librbd_resize(struct rbd_ctx *ctx, uint64_t size)
729 {
730         int ret;
731
732         ret = rbd_resize(ctx->image, size);
733         if (ret < 0) {
734                 prt("rbd_resize(%llu) failed\n", size);
735                 return ret;
736         }
737
738         return librbd_verify_object_map(ctx);
739 }
740
741 int
742 librbd_resize(struct rbd_ctx *ctx, uint64_t size)
743 {
744         return __librbd_resize(ctx, size);
745 }
746
747 int
748 __librbd_clone(struct rbd_ctx *ctx, const char *src_snapname,
749                const char *dst_imagename, int *order, int stripe_unit,
750                int stripe_count, bool krbd)
751 {
752         int ret;
753
754         ret = rbd_snap_create(ctx->image, src_snapname);
755         if (ret < 0) {
756                 prt("rbd_snap_create(%s@%s) failed\n", ctx->name,
757                     src_snapname);
758                 return ret;
759         }
760
761         ret = rbd_snap_protect(ctx->image, src_snapname);
762         if (ret < 0) {
763                 prt("rbd_snap_protect(%s@%s) failed\n", ctx->name,
764                     src_snapname);
765                 return ret;
766         }
767
768         uint64_t features = RBD_FEATURES_ALL;
769         if (krbd) {
770                 features &= ~(RBD_FEATURE_OBJECT_MAP     |
771                               RBD_FEATURE_FAST_DIFF      |
772                               RBD_FEATURE_DEEP_FLATTEN   |
773                               RBD_FEATURE_JOURNALING);
774         }
775         ret = rbd_clone2(ioctx, ctx->name, src_snapname, ioctx,
776                          dst_imagename, features, order,
777                          stripe_unit, stripe_count);
778         if (ret < 0) {
779                 prt("rbd_clone2(%s@%s -> %s) failed\n", ctx->name,
780                     src_snapname, dst_imagename);
781                 return ret;
782         }
783
784         return 0;
785 }
786
787 int
788 librbd_clone(struct rbd_ctx *ctx, const char *src_snapname,
789              const char *dst_imagename, int *order, int stripe_unit,
790              int stripe_count)
791 {
792         return __librbd_clone(ctx, src_snapname, dst_imagename, order,
793                               stripe_unit, stripe_count, false);
794 }
795
796 int
797 __librbd_flatten(struct rbd_ctx *ctx)
798 {
799         int ret;
800
801         ret = rbd_flatten(ctx->image);
802         if (ret < 0) {
803                 prt("rbd_flatten failed\n");
804                 return ret;
805         }
806
807         return librbd_verify_object_map(ctx);
808 }
809
810 int
811 librbd_flatten(struct rbd_ctx *ctx)
812 {
813         return __librbd_flatten(ctx);
814 }
815
816 const struct rbd_operations librbd_operations = {
817         librbd_open,
818         librbd_close,
819         librbd_read,
820         librbd_write,
821         librbd_flush,
822         librbd_discard,
823         librbd_get_size,
824         librbd_resize,
825         librbd_clone,
826         librbd_flatten,
827         librbd_writesame,
828         librbd_compare_and_write,
829 };
830
831 int
832 krbd_open(const char *name, struct rbd_ctx *ctx)
833 {
834         char *devnode;
835         int fd;
836         int ret;
837
838         ret = __librbd_open(name, ctx);
839         if (ret < 0)
840                 return ret;
841
842         ret = krbd_map(krbd, pool, name, "", "", &devnode);
843         if (ret < 0) {
844                 prt("krbd_map(%s) failed\n", name);
845                 return ret;
846         }
847
848         fd = open(devnode, O_RDWR | o_direct);
849         if (fd < 0) {
850                 ret = -errno;
851                 prt("open(%s) failed\n", devnode);
852                 return ret;
853         }
854
855         ctx->krbd_name = devnode;
856         ctx->krbd_fd = fd;
857
858         return 0;
859 }
860
861 int
862 krbd_close(struct rbd_ctx *ctx)
863 {
864         int ret;
865
866         assert(ctx->krbd_name && ctx->krbd_fd >= 0);
867
868         if (close(ctx->krbd_fd) < 0) {
869                 ret = -errno;
870                 prt("close(%s) failed\n", ctx->krbd_name);
871                 return ret;
872         }
873
874         ret = krbd_unmap(krbd, ctx->krbd_name, "");
875         if (ret < 0) {
876                 prt("krbd_unmap(%s) failed\n", ctx->krbd_name);
877                 return ret;
878         }
879
880         free((void *)ctx->krbd_name);
881
882         ctx->krbd_name = NULL;
883         ctx->krbd_fd = -1;
884
885         return __librbd_close(ctx);
886 }
887
888 ssize_t
889 krbd_read(struct rbd_ctx *ctx, uint64_t off, size_t len, char *buf)
890 {
891         ssize_t n;
892
893         n = pread(ctx->krbd_fd, buf, len, off);
894         if (n < 0) {
895                 n = -errno;
896                 prt("pread(%llu, %zu) failed\n", off, len);
897                 return n;
898         }
899
900         return n;
901 }
902
903 ssize_t
904 krbd_write(struct rbd_ctx *ctx, uint64_t off, size_t len, const char *buf)
905 {
906         ssize_t n;
907
908         n = pwrite(ctx->krbd_fd, buf, len, off);
909         if (n < 0) {
910                 n = -errno;
911                 prt("pwrite(%llu, %zu) failed\n", off, len);
912                 return n;
913         }
914
915         return n;
916 }
917
918 int
919 __krbd_flush(struct rbd_ctx *ctx, bool invalidate)
920 {
921         int ret;
922
923         if (o_direct)
924                 return 0;
925
926         /*
927          * BLKFLSBUF will sync the filesystem on top of the device (we
928          * don't care about that here, since we write directly to it),
929          * write out any dirty buffers and invalidate the buffer cache.
930          * It won't do a hardware cache flush.
931          *
932          * fsync() will write out any dirty buffers and do a hardware
933          * cache flush (which we don't care about either, because for
934          * krbd it's a noop).  It won't try to empty the buffer cache
935          * nor poke the filesystem before writing out.
936          *
937          * Given that, for our purposes, fsync is a flush, while
938          * BLKFLSBUF is a flush+invalidate.
939          */
940         if (invalidate)
941                 ret = ioctl(ctx->krbd_fd, BLKFLSBUF, NULL);
942         else
943                 ret = fsync(ctx->krbd_fd);
944         if (ret < 0) {
945                 ret = -errno;
946                 prt("%s failed\n", invalidate ? "BLKFLSBUF" : "fsync");
947                 return ret;
948         }
949
950         return 0;
951 }
952
953 int
954 krbd_flush(struct rbd_ctx *ctx)
955 {
956         return __krbd_flush(ctx, false);
957 }
958
959 int
960 krbd_discard(struct rbd_ctx *ctx, uint64_t off, uint64_t len)
961 {
962         uint64_t range[2] = { off, len };
963         int ret;
964
965         /*
966          * BLKDISCARD goes straight to disk and doesn't do anything
967          * about dirty buffers.  This means we need to flush so that
968          *
969          *   write 0..3M
970          *   discard 1..2M
971          *
972          * results in "data 0000 data" rather than "data data data" on
973          * disk and invalidate so that
974          *
975          *   discard 1..2M
976          *   read 0..3M
977          *
978          * returns "data 0000 data" rather than "data data data" in
979          * case 1..2M was cached.
980          */
981         ret = __krbd_flush(ctx, true);
982         if (ret < 0)
983                 return ret;
984
985         /*
986          * off and len must be 512-byte aligned, otherwise BLKDISCARD
987          * will fail with -EINVAL.  This means that -K (enable krbd
988          * mode) requires -h 512 or similar.
989          */
990         if (ioctl(ctx->krbd_fd, BLKDISCARD, &range) < 0) {
991                 ret = -errno;
992                 prt("BLKDISCARD(%llu, %llu) failed\n", off, len);
993                 return ret;
994         }
995
996         return 0;
997 }
998
999 int
1000 krbd_get_size(struct rbd_ctx *ctx, uint64_t *size)
1001 {
1002         uint64_t bytes;
1003
1004         if (ioctl(ctx->krbd_fd, BLKGETSIZE64, &bytes) < 0) {
1005                 int ret = -errno;
1006                 prt("BLKGETSIZE64 failed\n");
1007                 return ret;
1008         }
1009
1010         *size = bytes;
1011
1012         return 0;
1013 }
1014
1015 int
1016 krbd_resize(struct rbd_ctx *ctx, uint64_t size)
1017 {
1018         int ret;
1019
1020         assert(size % truncbdy == 0);
1021
1022         /*
1023          * When krbd detects a size change, it calls revalidate_disk(),
1024          * which ends up calling invalidate_bdev(), which invalidates
1025          * clean pages and does nothing about dirty pages beyond the
1026          * new size.  The preceding cache flush makes sure those pages
1027          * are invalidated, which is what we need on shrink so that
1028          *
1029          *  write 0..1M
1030          *  resize 0
1031          *  resize 2M
1032          *  read 0..2M
1033          *
1034          * returns "0000 0000" rather than "data 0000".
1035          */
1036         ret = __krbd_flush(ctx, false);
1037         if (ret < 0)
1038                 return ret;
1039
1040         return __librbd_resize(ctx, size);
1041 }
1042
1043 int
1044 krbd_clone(struct rbd_ctx *ctx, const char *src_snapname,
1045            const char *dst_imagename, int *order, int stripe_unit,
1046            int stripe_count)
1047 {
1048         int ret;
1049
1050         ret = __krbd_flush(ctx, false);
1051         if (ret < 0)
1052                 return ret;
1053
1054         return __librbd_clone(ctx, src_snapname, dst_imagename, order,
1055                               stripe_unit, stripe_count, true);
1056 }
1057
1058 int
1059 krbd_flatten(struct rbd_ctx *ctx)
1060 {
1061         int ret;
1062
1063         ret = __krbd_flush(ctx, false);
1064         if (ret < 0)
1065                 return ret;
1066
1067         return __librbd_flatten(ctx);
1068 }
1069
1070 const struct rbd_operations krbd_operations = {
1071         krbd_open,
1072         krbd_close,
1073         krbd_read,
1074         krbd_write,
1075         krbd_flush,
1076         krbd_discard,
1077         krbd_get_size,
1078         krbd_resize,
1079         krbd_clone,
1080         krbd_flatten,
1081         NULL,
1082 };
1083
1084 int
1085 nbd_open(const char *name, struct rbd_ctx *ctx)
1086 {
1087         int r;
1088         int fd;
1089         char dev[4096];
1090         char *devnode;
1091
1092         SubProcess process("rbd-nbd", SubProcess::KEEP, SubProcess::PIPE,
1093                            SubProcess::KEEP);
1094         process.add_cmd_arg("map");
1095         std::string img;
1096         img.append(pool);
1097         img.append("/");
1098         img.append(name);
1099         process.add_cmd_arg(img.c_str());
1100
1101         r = __librbd_open(name, ctx);
1102         if (r < 0)
1103                 return r;
1104
1105         r = process.spawn();
1106         if (r < 0) {
1107                 prt("nbd_open failed to run rbd-nbd error: %s\n", process.err().c_str());
1108                 return r;
1109         }
1110         r = safe_read(process.get_stdout(), dev, sizeof(dev));
1111         if (r < 0) {
1112                 prt("nbd_open failed to get nbd device path\n");
1113                 return r;
1114         }
1115         for (int i = 0; i < r; ++i)
1116           if (dev[i] == 10 || dev[i] == 13)
1117             dev[i] = 0;
1118         dev[r] = 0;
1119         r = process.join();
1120         if (r) {
1121                 prt("rbd-nbd failed with error: %s", process.err().c_str());
1122                 return -EINVAL;
1123         }
1124
1125         devnode = strdup(dev);
1126         if (!devnode)
1127                 return -ENOMEM;
1128
1129         fd = open(devnode, O_RDWR | o_direct);
1130         if (fd < 0) {
1131                 r = -errno;
1132                 prt("open(%s) failed\n", devnode);
1133                 return r;
1134         }
1135
1136         ctx->krbd_name = devnode;
1137         ctx->krbd_fd = fd;
1138
1139         return 0;
1140 }
1141
1142 int
1143 nbd_close(struct rbd_ctx *ctx)
1144 {
1145         int r;
1146
1147         assert(ctx->krbd_name && ctx->krbd_fd >= 0);
1148
1149         if (close(ctx->krbd_fd) < 0) {
1150                 r = -errno;
1151                 prt("close(%s) failed\n", ctx->krbd_name);
1152                 return r;
1153         }
1154
1155         SubProcess process("rbd-nbd");
1156         process.add_cmd_arg("unmap");
1157         process.add_cmd_arg(ctx->krbd_name);
1158
1159         r = process.spawn();
1160         if (r < 0) {
1161                 prt("nbd_close failed to run rbd-nbd error: %s\n", process.err().c_str());
1162                 return r;
1163         }
1164         r = process.join();
1165         if (r) {
1166                 prt("rbd-nbd failed with error: %d", process.err().c_str());
1167                 return -EINVAL;
1168         }
1169
1170         free((void *)ctx->krbd_name);
1171
1172         ctx->krbd_name = NULL;
1173         ctx->krbd_fd = -1;
1174
1175         return __librbd_close(ctx);
1176 }
1177
1178 int
1179 nbd_clone(struct rbd_ctx *ctx, const char *src_snapname,
1180           const char *dst_imagename, int *order, int stripe_unit,
1181           int stripe_count)
1182 {
1183         int ret;
1184
1185         ret = __krbd_flush(ctx, false);
1186         if (ret < 0)
1187                 return ret;
1188
1189         return __librbd_clone(ctx, src_snapname, dst_imagename, order,
1190                               stripe_unit, stripe_count, false);
1191 }
1192
1193 const struct rbd_operations nbd_operations = {
1194         nbd_open,
1195         nbd_close,
1196         krbd_read,
1197         krbd_write,
1198         krbd_flush,
1199         krbd_discard,
1200         krbd_get_size,
1201         krbd_resize,
1202         nbd_clone,
1203         krbd_flatten,
1204         NULL,
1205 };
1206
1207 struct rbd_ctx ctx = RBD_CTX_INIT;
1208 const struct rbd_operations *ops = &librbd_operations;
1209
1210 static bool rbd_image_has_parent(struct rbd_ctx *ctx)
1211 {
1212         int ret;
1213
1214         ret = rbd_get_parent_info(ctx->image, NULL, 0, NULL, 0, NULL, 0);
1215         if (ret < 0 && ret != -ENOENT) {
1216                 prterrcode("rbd_get_parent_info", ret);
1217                 exit(1);
1218         }
1219
1220         return !ret;
1221 }
1222
1223 /*
1224  * fsx
1225  */
1226
1227 void
1228 log4(int operation, int arg0, int arg1, int arg2)
1229 {
1230         struct log_entry *le;
1231
1232         le = &oplog[logptr];
1233         le->operation = operation;
1234         if (closeopen)
1235                 le->operation = ~ le->operation;
1236         le->args[0] = arg0;
1237         le->args[1] = arg1;
1238         le->args[2] = arg2;
1239         logptr++;
1240         logcount++;
1241         if (logptr >= LOGSIZE)
1242                 logptr = 0;
1243 }
1244
1245 void
1246 logdump(void)
1247 {
1248         int     i, count, down;
1249         struct log_entry        *lp;
1250         const char *falloc_type[3] = {"PAST_EOF", "EXTENDING", "INTERIOR"};
1251
1252         prt("LOG DUMP (%d total operations):\n", logcount);
1253         if (logcount < LOGSIZE) {
1254                 i = 0;
1255                 count = logcount;
1256         } else {
1257                 i = logptr;
1258                 count = LOGSIZE;
1259         }
1260         for ( ; count > 0; count--) {
1261                 int opnum;
1262
1263                 opnum = i+1 + (logcount/LOGSIZE)*LOGSIZE;
1264                 prt("%d(%3d mod 256): ", opnum, opnum%256);
1265                 lp = &oplog[i];
1266                 if ((closeopen = lp->operation < 0))
1267                         lp->operation = ~ lp->operation;
1268                         
1269                 switch (lp->operation) {
1270                 case OP_MAPREAD:
1271                         prt("MAPREAD  0x%x thru 0x%x\t(0x%x bytes)",
1272                             lp->args[0], lp->args[0] + lp->args[1] - 1,
1273                             lp->args[1]);
1274                         if (badoff >= lp->args[0] && badoff <
1275                                                      lp->args[0] + lp->args[1])
1276                                 prt("\t***RRRR***");
1277                         break;
1278                 case OP_MAPWRITE:
1279                         prt("MAPWRITE 0x%x thru 0x%x\t(0x%x bytes)",
1280                             lp->args[0], lp->args[0] + lp->args[1] - 1,
1281                             lp->args[1]);
1282                         if (badoff >= lp->args[0] && badoff <
1283                                                      lp->args[0] + lp->args[1])
1284                                 prt("\t******WWWW");
1285                         break;
1286                 case OP_READ:
1287                         prt("READ     0x%x thru 0x%x\t(0x%x bytes)",
1288                             lp->args[0], lp->args[0] + lp->args[1] - 1,
1289                             lp->args[1]);
1290                         if (badoff >= lp->args[0] &&
1291                             badoff < lp->args[0] + lp->args[1])
1292                                 prt("\t***RRRR***");
1293                         break;
1294                 case OP_WRITE:
1295                         prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
1296                             lp->args[0], lp->args[0] + lp->args[1] - 1,
1297                             lp->args[1]);
1298                         if (lp->args[0] > lp->args[2])
1299                                 prt(" HOLE");
1300                         else if (lp->args[0] + lp->args[1] > lp->args[2])
1301                                 prt(" EXTEND");
1302                         if ((badoff >= lp->args[0] || badoff >=lp->args[2]) &&
1303                             badoff < lp->args[0] + lp->args[1])
1304                                 prt("\t***WWWW");
1305                         break;
1306                 case OP_TRUNCATE:
1307                         down = lp->args[0] < lp->args[1];
1308                         prt("TRUNCATE %s\tfrom 0x%x to 0x%x",
1309                             down ? "DOWN" : "UP", lp->args[1], lp->args[0]);
1310                         if (badoff >= lp->args[!down] &&
1311                             badoff < lp->args[!!down])
1312                                 prt("\t******WWWW");
1313                         break;
1314                 case OP_FALLOCATE:
1315                         /* 0: offset 1: length 2: where alloced */
1316                         prt("FALLOC   0x%x thru 0x%x\t(0x%x bytes) %s",
1317                                 lp->args[0], lp->args[0] + lp->args[1],
1318                                 lp->args[1], falloc_type[lp->args[2]]);
1319                         if (badoff >= lp->args[0] &&
1320                             badoff < lp->args[0] + lp->args[1])
1321                                 prt("\t******FFFF");
1322                         break;
1323                 case OP_PUNCH_HOLE:
1324                         prt("PUNCH    0x%x thru 0x%x\t(0x%x bytes)",
1325                             lp->args[0], lp->args[0] + lp->args[1] - 1,
1326                             lp->args[1]);
1327                         if (badoff >= lp->args[0] && badoff <
1328                                                      lp->args[0] + lp->args[1])
1329                                 prt("\t******PPPP");
1330                         break;
1331                 case OP_WRITESAME:
1332                         prt("WRITESAME    0x%x thru 0x%x\t(0x%x bytes) data_size 0x%x",
1333                             lp->args[0], lp->args[0] + lp->args[1] - 1,
1334                             lp->args[1], lp->args[2]);
1335                         if (badoff >= lp->args[0] &&
1336                                 badoff < lp->args[0] + lp->args[1])
1337                                 prt("\t***WSWSWSWS");
1338                         break;
1339                 case OP_COMPARE_AND_WRITE:
1340                         prt("COMPARE_AND_WRITE    0x%x thru 0x%x\t(0x%x bytes)",
1341                             lp->args[0], lp->args[0] + lp->args[1] - 1,
1342                             lp->args[1]);
1343                         if (lp->args[0] > lp->args[2])
1344                             prt(" HOLE");
1345                         else if (lp->args[0] + lp->args[1] > lp->args[2])
1346                             prt(" EXTEND");
1347                         if ((badoff >= lp->args[0] || badoff >=lp->args[2]) &&
1348                                 badoff < lp->args[0] + lp->args[1])
1349                                 prt("\t***WWWW");
1350                         break;
1351                 case OP_CLONE:
1352                         prt("CLONE");
1353                         break;
1354                 case OP_FLATTEN:
1355                         prt("FLATTEN");
1356                         break;
1357                 case OP_SKIPPED:
1358                         prt("SKIPPED (no operation)");
1359                         break;
1360                 default:
1361                         prt("BOGUS LOG ENTRY (operation code = %d)!",
1362                             lp->operation);
1363                 }
1364                 if (closeopen)
1365                         prt("\n\t\tCLOSE/OPEN");
1366                 prt("\n");
1367                 i++;
1368                 if (i == LOGSIZE)
1369                         i = 0;
1370         }
1371 }
1372
1373 void
1374 save_buffer(char *buffer, off_t bufferlength, int fd)
1375 {
1376         off_t ret;
1377         ssize_t byteswritten;
1378
1379         if (fd <= 0 || bufferlength == 0)
1380                 return;
1381
1382         if (bufferlength > SSIZE_MAX) {
1383                 prt("fsx flaw: overflow in save_buffer\n");
1384                 exit(67);
1385         }
1386
1387         ret = lseek(fd, (off_t)0, SEEK_SET);
1388         if (ret == (off_t)-1)
1389                 prterr("save_buffer: lseek 0");
1390         
1391         byteswritten = write(fd, buffer, (size_t)bufferlength);
1392         if (byteswritten != bufferlength) {
1393                 if (byteswritten == -1)
1394                         prterr("save_buffer write");
1395                 else
1396                         warn("save_buffer: short write, 0x%x bytes instead of 0x%llx\n",
1397                              (unsigned)byteswritten,
1398                              (unsigned long long)bufferlength);
1399         }
1400 }
1401
1402
1403 void
1404 report_failure(int status)
1405 {
1406         logdump();
1407         
1408         if (fsxgoodfd) {
1409                 if (good_buf) {
1410                         save_buffer(good_buf, file_size, fsxgoodfd);
1411                         prt("Correct content saved for comparison\n");
1412                         prt("(maybe hexdump \"%s\" vs \"%s.fsxgood\")\n",
1413                             iname, iname);
1414                 }
1415                 close(fsxgoodfd);
1416         }
1417         sleep(3);   // so the log can flush to disk.  KLUDGEY!
1418         exit(status);
1419 }
1420
1421 #define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
1422                                         *(((unsigned char *)(cp)) + 1)))
1423
1424 int
1425 fsxcmp(char *good_buf, char *temp_buf, unsigned size)
1426 {
1427         if (!skip_partial_discard) {
1428                 return memcmp(good_buf, temp_buf, size);
1429         }
1430
1431         for (unsigned i = 0; i < size; i++) {
1432                 if (good_buf[i] != temp_buf[i] && good_buf[i] != 0) {
1433                         return good_buf[i] - temp_buf[i];
1434                 }
1435         }
1436         return 0;
1437 }
1438
1439 void
1440 check_buffers(char *good_buf, char *temp_buf, unsigned offset, unsigned size)
1441 {
1442         if (fsxcmp(good_buf + offset, temp_buf, size) != 0) {
1443                 unsigned i = 0;
1444                 unsigned n = 0;
1445
1446                 prt("READ BAD DATA: offset = 0x%x, size = 0x%x, fname = %s\n",
1447                     offset, size, iname);
1448                 prt("OFFSET\tGOOD\tBAD\tRANGE\n");
1449                 while (size > 0) {
1450                         unsigned char c = good_buf[offset];
1451                         unsigned char t = temp_buf[i];
1452                         if (c != t) {
1453                                 if (n < 16) {
1454                                         unsigned bad = short_at(&temp_buf[i]);
1455                                         prt("0x%5x\t0x%04x\t0x%04x", offset,
1456                                             short_at(&good_buf[offset]), bad);
1457                                         unsigned op = temp_buf[(offset & 1) ? i+1 : i];
1458                                         prt("\t0x%5x\n", n);
1459                                         if (op)
1460                                                 prt("operation# (mod 256) for "
1461                                                   "the bad data may be %u\n",
1462                                                 ((unsigned)op & 0xff));
1463                                         else
1464                                                 prt("operation# (mod 256) for "
1465                                                   "the bad data unknown, check"
1466                                                   " HOLE and EXTEND ops\n");
1467                                 }
1468                                 n++;
1469                                 badoff = offset;
1470                         }
1471                         offset++;
1472                         i++;
1473                         size--;
1474                 }
1475                 report_failure(110);
1476         }
1477 }
1478
1479
1480 void
1481 check_size(void)
1482 {
1483         uint64_t size;
1484         int ret;
1485
1486         ret = ops->get_size(&ctx, &size);
1487         if (ret < 0)
1488                 prterrcode("check_size: ops->get_size", ret);
1489
1490         if ((uint64_t)file_size != size) {
1491                 prt("Size error: expected 0x%llx stat 0x%llx\n",
1492                     (unsigned long long)file_size,
1493                     (unsigned long long)size);
1494                 report_failure(120);
1495         }
1496 }
1497
1498 #define TRUNC_HACK_SIZE (200ULL << 9)   /* 512-byte aligned for krbd */
1499
1500 void
1501 check_trunc_hack(void)
1502 {
1503         uint64_t size;
1504         int ret;
1505
1506         ret = ops->resize(&ctx, 0ULL);
1507         if (ret < 0)
1508                 prterrcode("check_trunc_hack: ops->resize pre", ret);
1509
1510         ret = ops->resize(&ctx, TRUNC_HACK_SIZE);
1511         if (ret < 0)
1512                 prterrcode("check_trunc_hack: ops->resize actual", ret);
1513
1514         ret = ops->get_size(&ctx, &size);
1515         if (ret < 0)
1516                 prterrcode("check_trunc_hack: ops->get_size", ret);
1517
1518         if (size != TRUNC_HACK_SIZE) {
1519                 prt("no extend on truncate! not posix!\n");
1520                 exit(130);
1521         }
1522
1523         ret = ops->resize(&ctx, 0ULL);
1524         if (ret < 0)
1525                 prterrcode("check_trunc_hack: ops->resize post", ret);
1526 }
1527
1528 int
1529 create_image()
1530 {
1531         int r;
1532         int order = 0;
1533         char buf[32];
1534
1535         r = rados_create(&cluster, NULL);
1536         if (r < 0) {
1537                 simple_err("Could not create cluster handle", r);
1538                 return r;
1539         }
1540         rados_conf_parse_env(cluster, NULL);
1541         r = rados_conf_read_file(cluster, NULL);
1542         if (r < 0) {
1543                 simple_err("Error reading ceph config file", r);
1544                 goto failed_shutdown;
1545         }
1546         r = rados_connect(cluster);
1547         if (r < 0) {
1548                 simple_err("Error connecting to cluster", r);
1549                 goto failed_shutdown;
1550         }
1551         r = krbd_create_from_context(rados_cct(cluster), &krbd);
1552         if (r < 0) {
1553                 simple_err("Could not create libkrbd handle", r);
1554                 goto failed_shutdown;
1555         }
1556
1557         r = rados_pool_create(cluster, pool);
1558         if (r < 0 && r != -EEXIST) {
1559                 simple_err("Error creating pool", r);
1560                 goto failed_krbd;
1561         }
1562         r = rados_ioctx_create(cluster, pool, &ioctx);
1563         if (r < 0) {
1564                 simple_err("Error creating ioctx", r);
1565                 goto failed_krbd;
1566         }
1567         rados_application_enable(ioctx, "rbd", 1);
1568
1569         if (clone_calls || journal_replay) {
1570                 uint64_t features = 0;
1571                 if (clone_calls) {
1572                         features |= RBD_FEATURE_LAYERING;
1573                 }
1574                 if (journal_replay) {
1575                         features |= (RBD_FEATURE_EXCLUSIVE_LOCK |
1576                                      RBD_FEATURE_JOURNALING);
1577                 }
1578                 r = rbd_create2(ioctx, iname, 0, features, &order);
1579         } else {
1580                 r = rbd_create(ioctx, iname, 0, &order);
1581         }
1582         if (r < 0) {
1583                 simple_err("Error creating image", r);
1584                 goto failed_open;
1585         }
1586
1587         if (journal_replay) {
1588                 r = register_journal(ioctx, iname);
1589                 if (r < 0) {
1590                         goto failed_open;
1591                 }
1592         }
1593
1594         r = rados_conf_get(cluster, "rbd_skip_partial_discard", buf,
1595                            sizeof(buf));
1596         if (r < 0) {
1597                 simple_err("Could not get rbd_skip_partial_discard value", r);
1598                 goto failed_open;
1599         }
1600         skip_partial_discard = (strcmp(buf, "true") == 0);
1601
1602         return 0;
1603
1604  failed_open:
1605         rados_ioctx_destroy(ioctx);
1606  failed_krbd:
1607         krbd_destroy(krbd);
1608  failed_shutdown:
1609         rados_shutdown(cluster);
1610         return r;
1611 }
1612
1613 void
1614 doflush(unsigned offset, unsigned size)
1615 {
1616         int ret;
1617
1618         if (o_direct)
1619                 return;
1620
1621         ret = ops->flush(&ctx);
1622         if (ret < 0)
1623                 prterrcode("doflush: ops->flush", ret);
1624 }
1625
1626 void
1627 doread(unsigned offset, unsigned size)
1628 {
1629         int ret;
1630
1631         offset -= offset % readbdy;
1632         if (o_direct)
1633                 size -= size % readbdy;
1634         if (size == 0) {
1635                 if (!quiet && testcalls > simulatedopcount && !o_direct)
1636                         prt("skipping zero size read\n");
1637                 log4(OP_SKIPPED, OP_READ, offset, size);
1638                 return;
1639         }
1640         if (size + offset > file_size) {
1641                 if (!quiet && testcalls > simulatedopcount)
1642                         prt("skipping seek/read past end of file\n");
1643                 log4(OP_SKIPPED, OP_READ, offset, size);
1644                 return;
1645         }
1646
1647         log4(OP_READ, offset, size, 0);
1648
1649         if (testcalls <= simulatedopcount)
1650                 return;
1651
1652         if (!quiet &&
1653                 ((progressinterval && testcalls % progressinterval == 0)  ||
1654                 (debug &&
1655                        (monitorstart == -1 ||
1656                         (static_cast<long>(offset + size) > monitorstart &&
1657                          (monitorend == -1 ||
1658                           static_cast<long>(offset) <= monitorend))))))
1659                 prt("%lu read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
1660                     offset, offset + size - 1, size);
1661
1662         ret = ops->read(&ctx, offset, size, temp_buf);
1663         if (ret != (int)size) {
1664                 if (ret < 0)
1665                         prterrcode("doread: ops->read", ret);
1666                 else
1667                         prt("short read: 0x%x bytes instead of 0x%x\n",
1668                             ret, size);
1669                 report_failure(141);
1670         }
1671
1672         check_buffers(good_buf, temp_buf, offset, size);
1673 }
1674
1675
1676 void
1677 check_eofpage(char *s, unsigned offset, char *p, int size)
1678 {
1679         unsigned long last_page, should_be_zero;
1680
1681         if (offset + size <= (file_size & ~page_mask))
1682                 return;
1683         /*
1684          * we landed in the last page of the file
1685          * test to make sure the VM system provided 0's 
1686          * beyond the true end of the file mapping
1687          * (as required by mmap def in 1996 posix 1003.1)
1688          */
1689         last_page = ((unsigned long)p + (offset & page_mask) + size) & ~page_mask;
1690
1691         for (should_be_zero = last_page + (file_size & page_mask);
1692              should_be_zero < last_page + page_size;
1693              should_be_zero++)
1694                 if (*(char *)should_be_zero) {
1695                         prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%x is 0x%04x\n",
1696                             s, file_size - 1, should_be_zero & page_mask,
1697                             short_at(should_be_zero));
1698                         report_failure(205);
1699                 }
1700 }
1701
1702
1703 void
1704 gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
1705 {
1706         while (size--) {
1707                 good_buf[offset] = testcalls % 256; 
1708                 if (offset % 2)
1709                         good_buf[offset] += original_buf[offset];
1710                 offset++;
1711         }
1712 }
1713
1714
1715 void
1716 dowrite(unsigned offset, unsigned size)
1717 {
1718         ssize_t ret;
1719         off_t newsize;
1720
1721         offset -= offset % writebdy;
1722         if (o_direct)
1723                 size -= size % writebdy;
1724         if (size == 0) {
1725                 if (!quiet && testcalls > simulatedopcount && !o_direct)
1726                         prt("skipping zero size write\n");
1727                 log4(OP_SKIPPED, OP_WRITE, offset, size);
1728                 return;
1729         }
1730
1731         log4(OP_WRITE, offset, size, file_size);
1732
1733         gendata(original_buf, good_buf, offset, size);
1734         if (file_size < offset + size) {
1735                 newsize = ceil(((double)offset + size) / truncbdy) * truncbdy;
1736                 if (file_size < newsize)
1737                         memset(good_buf + file_size, '\0', newsize - file_size);
1738                 file_size = newsize;
1739                 if (lite) {
1740                         warn("Lite file size bug in fsx!");
1741                         report_failure(149);
1742                 }
1743                 ret = ops->resize(&ctx, newsize);
1744                 if (ret < 0) {
1745                         prterrcode("dowrite: ops->resize", ret);
1746                         report_failure(150);
1747                 }
1748         }
1749
1750         if (testcalls <= simulatedopcount)
1751                 return;
1752
1753         if (!quiet &&
1754                 ((progressinterval && testcalls % progressinterval == 0) ||
1755                        (debug &&
1756                        (monitorstart == -1 ||
1757                         (static_cast<long>(offset + size) > monitorstart &&
1758                          (monitorend == -1 ||
1759                           static_cast<long>(offset) <= monitorend))))))
1760                 prt("%lu write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
1761                     offset, offset + size - 1, size);
1762
1763         ret = ops->write(&ctx, offset, size, good_buf + offset);
1764         if (ret != (ssize_t)size) {
1765                 if (ret < 0)
1766                         prterrcode("dowrite: ops->write", ret);
1767                 else
1768                         prt("short write: 0x%x bytes instead of 0x%x\n",
1769                             ret, size);
1770                 report_failure(151);
1771         }
1772
1773         if (flush_enabled)
1774                 doflush(offset, size);
1775 }
1776
1777
1778 void
1779 dotruncate(unsigned size)
1780 {
1781         int oldsize = file_size;
1782         int ret;
1783
1784         size -= size % truncbdy;
1785         if (size > biggest) {
1786                 biggest = size;
1787                 if (!quiet && testcalls > simulatedopcount)
1788                         prt("truncating to largest ever: 0x%x\n", size);
1789         }
1790
1791         log4(OP_TRUNCATE, size, (unsigned)file_size, 0);
1792
1793         if (size > file_size)
1794                 memset(good_buf + file_size, '\0', size - file_size);
1795         else if (size < file_size)
1796                 memset(good_buf + size, '\0', file_size - size);
1797         file_size = size;
1798
1799         if (testcalls <= simulatedopcount)
1800                 return;
1801
1802         if ((progressinterval && testcalls % progressinterval == 0) ||
1803             (debug && (monitorstart == -1 || monitorend == -1 ||
1804                        (long)size <= monitorend)))
1805                 prt("%lu trunc\tfrom 0x%x to 0x%x\n", testcalls, oldsize, size);
1806
1807         ret = ops->resize(&ctx, size);
1808         if (ret < 0) {
1809                 prterrcode("dotruncate: ops->resize", ret);
1810                 report_failure(160);
1811         }
1812 }
1813
1814 void
1815 do_punch_hole(unsigned offset, unsigned length)
1816 {
1817         unsigned end_offset;
1818         int max_offset = 0;
1819         int max_len = 0;
1820         int ret;
1821
1822         offset -= offset % holebdy;
1823         length -= length % holebdy;
1824         if (length == 0) {
1825                 if (!quiet && testcalls > simulatedopcount)
1826                         prt("skipping zero length punch hole\n");
1827                 log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
1828                 return;
1829         }
1830
1831         if (file_size <= (loff_t)offset) {
1832                 if (!quiet && testcalls > simulatedopcount)
1833                         prt("skipping hole punch off the end of the file\n");
1834                 log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
1835                 return;
1836         }
1837
1838         end_offset = offset + length;
1839
1840         log4(OP_PUNCH_HOLE, offset, length, 0);
1841
1842         if (testcalls <= simulatedopcount)
1843                 return;
1844
1845         if ((progressinterval && testcalls % progressinterval == 0) ||
1846             (debug && (monitorstart == -1 || monitorend == -1 ||
1847                        (long)end_offset <= monitorend))) {
1848                 prt("%lu punch\tfrom 0x%x to 0x%x, (0x%x bytes)\n", testcalls,
1849                         offset, offset+length, length);
1850         }
1851
1852         ret = ops->discard(&ctx, (unsigned long long)offset,
1853                            (unsigned long long)length);
1854         if (ret < 0) {
1855                 prterrcode("do_punch_hole: ops->discard", ret);
1856                 report_failure(161);
1857         }
1858
1859         max_offset = offset < file_size ? offset : file_size;
1860         max_len = max_offset + length <= file_size ? length :
1861                         file_size - max_offset;
1862         memset(good_buf + max_offset, '\0', max_len);
1863 }
1864
1865 unsigned get_data_size(unsigned size)
1866 {
1867         unsigned i;
1868         unsigned hint;
1869         unsigned max = sqrt((double)size) + 1;
1870         unsigned good = 1;
1871         unsigned curr = good;
1872
1873         hint = get_random() % max;
1874
1875         for (i = 1; i < max && curr < hint; i++) {
1876                 if (size % i == 0) {
1877                         good = curr;
1878                         curr = i;
1879                 }
1880         }
1881
1882         if (curr == hint)
1883                 good = curr;
1884
1885         return good;
1886 }
1887
1888 void
1889 dowritesame(unsigned offset, unsigned size)
1890 {
1891         ssize_t ret;
1892         off_t newsize;
1893         unsigned buf_off;
1894         unsigned data_size;
1895         int n;
1896
1897         offset -= offset % writebdy;
1898         if (o_direct)
1899                 size -= size % writebdy;
1900         if (size == 0) {
1901                 if (!quiet && testcalls > simulatedopcount && !o_direct)
1902                         prt("skipping zero size writesame\n");
1903                 log4(OP_SKIPPED, OP_WRITESAME, offset, size);
1904                 return;
1905         }
1906
1907         data_size = get_data_size(size);
1908
1909         log4(OP_WRITESAME, offset, size, data_size);
1910
1911         gendata(original_buf, good_buf, offset, data_size);
1912         if (file_size < offset + size) {
1913                 newsize = ceil(((double)offset + size) / truncbdy) * truncbdy;
1914                 if (file_size < newsize)
1915                         memset(good_buf + file_size, '\0', newsize - file_size);
1916                 file_size = newsize;
1917                 if (lite) {
1918                         warn("Lite file size bug in fsx!");
1919                         report_failure(162);
1920                 }
1921                 ret = ops->resize(&ctx, newsize);
1922                 if (ret < 0) {
1923                         prterrcode("dowritesame: ops->resize", ret);
1924                         report_failure(163);
1925                 }
1926         }
1927
1928         for (n = size / data_size, buf_off = data_size; n > 1; n--) {
1929                 memcpy(good_buf + offset + buf_off, good_buf + offset, data_size);
1930                 buf_off += data_size;
1931         }
1932
1933         if (testcalls <= simulatedopcount)
1934                 return;
1935
1936         if (!quiet &&
1937                 ((progressinterval && testcalls % progressinterval == 0) ||
1938                        (debug &&
1939                        (monitorstart == -1 ||
1940                         (static_cast<long>(offset + size) > monitorstart &&
1941                          (monitorend == -1 ||
1942                           static_cast<long>(offset) <= monitorend))))))
1943                 prt("%lu writesame\t0x%x thru\t0x%x\tdata_size\t0x%x(0x%x bytes)\n", testcalls,
1944                     offset, offset + size - 1, data_size, size);
1945
1946         ret = ops->writesame(&ctx, offset, size, good_buf + offset, data_size);
1947         if (ret != (ssize_t)size) {
1948                 if (ret < 0)
1949                         prterrcode("dowritesame: ops->writesame", ret);
1950                 else
1951                         prt("short writesame: 0x%x bytes instead of 0x%x\n",
1952                             ret, size);
1953                 report_failure(164);
1954         }
1955
1956         if (flush_enabled)
1957                 doflush(offset, size);
1958 }
1959
1960 void
1961 docompareandwrite(unsigned offset, unsigned size)
1962 {
1963         int ret;
1964
1965         offset -= offset % writebdy;
1966         if (o_direct)
1967                 size -= size % writebdy;
1968
1969         if (size == 0) {
1970                 if (!quiet && testcalls > simulatedopcount && !o_direct)
1971                         prt("skipping zero size read\n");
1972                 log4(OP_SKIPPED, OP_READ, offset, size);
1973                 return;
1974         }
1975
1976         if (size + offset > file_size) {
1977                 if (!quiet && testcalls > simulatedopcount)
1978                         prt("skipping seek/compare past end of file\n");
1979                 log4(OP_SKIPPED, OP_COMPARE_AND_WRITE, offset, size);
1980                 return;
1981         }
1982
1983         memcpy(temp_buf + offset, good_buf + offset, size);
1984         gendata(original_buf, good_buf, offset, size);
1985         log4(OP_COMPARE_AND_WRITE, offset, size, 0);
1986
1987         if (testcalls <= simulatedopcount)
1988                 return;
1989
1990         if (!quiet &&
1991                 ((progressinterval && testcalls % progressinterval == 0) ||
1992                        (debug &&
1993                        (monitorstart == -1 ||
1994                         (static_cast<long>(offset + size) > monitorstart &&
1995                          (monitorend == -1 ||
1996                           static_cast<long>(offset) <= monitorend))))))
1997                 prt("%lu compareandwrite\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
1998                     offset, offset + size - 1, size);
1999
2000         ret = ops->compare_and_write(&ctx, offset, size, temp_buf + offset,
2001                                      good_buf + offset);
2002         if (ret != (ssize_t)size) {
2003                 if (ret == -EINVAL) {
2004                         memcpy(good_buf + offset, temp_buf + offset, size);
2005                         return;
2006                 }
2007                 if (ret < 0)
2008                         prterrcode("docompareandwrite: ops->compare_and_write", ret);
2009                 else
2010                         prt("short write: 0x%x bytes instead of 0x%x\n", ret, size);
2011                 report_failure(151);
2012                 return;
2013         }
2014
2015         if (flush_enabled)
2016                 doflush(offset, size);
2017 }
2018
2019 void clone_filename(char *buf, size_t len, int clones)
2020 {
2021         snprintf(buf, len, "%s/fsx-%s-parent%d",
2022                  dirpath, iname, clones);
2023 }
2024
2025 void clone_imagename(char *buf, size_t len, int clones)
2026 {
2027         if (clones > 0)
2028                 snprintf(buf, len, "%s-clone%d", iname, clones);
2029         else
2030                 strncpy(buf, iname, len);
2031         buf[len - 1] = '\0';
2032 }
2033
2034 void replay_imagename(char *buf, size_t len, int clones)
2035 {
2036         clone_imagename(buf, len, clones);
2037         strncat(buf, "-replay", len - strlen(buf));
2038         buf[len - 1] = '\0';
2039 }
2040
2041 void check_clone(int clonenum, bool replay_image);
2042
2043 void
2044 do_clone()
2045 {
2046         char filename[1024];
2047         char imagename[1024];
2048         char lastimagename[1024];
2049         int ret, fd;
2050         int order = 0, stripe_unit = 0, stripe_count = 0;
2051         uint64_t newsize = file_size;
2052
2053         log4(OP_CLONE, 0, 0, 0);
2054         ++num_clones;
2055
2056         if (randomize_striping) {
2057                 order = 18 + get_random() % 8;
2058                 stripe_unit = 1ull << (order - 1 - (get_random() % 8));
2059                 stripe_count = 2 + get_random() % 14;
2060         }
2061
2062         prt("%lu clone\t%d order %d su %d sc %d\n", testcalls, num_clones,
2063             order, stripe_unit, stripe_count);
2064
2065         clone_imagename(imagename, sizeof(imagename), num_clones);
2066         clone_imagename(lastimagename, sizeof(lastimagename),
2067                         num_clones - 1);
2068         assert(strcmp(lastimagename, ctx.name) == 0);
2069
2070         ret = ops->clone(&ctx, "snap", imagename, &order, stripe_unit,
2071                          stripe_count);
2072         if (ret < 0) {
2073                 prterrcode("do_clone: ops->clone", ret);
2074                 exit(165);
2075         }
2076
2077         if (randomize_parent_overlap && rbd_image_has_parent(&ctx)) {
2078                 int rand = get_random() % 16 + 1; // [1..16]
2079
2080                 if (rand < 13) {
2081                         uint64_t overlap;
2082
2083                         ret = rbd_get_overlap(ctx.image, &overlap);
2084                         if (ret < 0) {
2085                                 prterrcode("do_clone: rbd_get_overlap", ret);
2086                                 exit(1);
2087                         }
2088
2089                         if (rand < 10) {        // 9/16
2090                                 newsize = overlap * ((double)rand / 10);
2091                                 newsize -= newsize % truncbdy;
2092                         } else {                // 3/16
2093                                 newsize = 0;
2094                         }
2095
2096                         assert(newsize != (uint64_t)file_size);
2097                         prt("truncating image %s from 0x%llx (overlap 0x%llx) to 0x%llx\n",
2098                             ctx.name, file_size, overlap, newsize);
2099
2100                         ret = ops->resize(&ctx, newsize);
2101                         if (ret < 0) {
2102                                 prterrcode("do_clone: ops->resize", ret);
2103                                 exit(1);
2104                         }
2105                 } else if (rand < 15) {         // 2/16
2106                         prt("flattening image %s\n", ctx.name);
2107
2108                         ret = ops->flatten(&ctx);
2109                         if (ret < 0) {
2110                                 prterrcode("do_clone: ops->flatten", ret);
2111                                 exit(1);
2112                         }
2113                 } else {                        // 2/16
2114                         prt("leaving image %s intact\n", ctx.name);
2115                 }
2116         }
2117
2118         clone_filename(filename, sizeof(filename), num_clones);
2119         if ((fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0666)) < 0) {
2120                 simple_err("do_clone: open", -errno);
2121                 exit(162);
2122         }
2123         save_buffer(good_buf, newsize, fd);
2124         if ((ret = close(fd)) < 0) {
2125                 simple_err("do_clone: close", -errno);
2126                 exit(163);
2127         }
2128
2129         /*
2130          * Close parent.
2131          */
2132         if ((ret = ops->close(&ctx)) < 0) {
2133                 prterrcode("do_clone: ops->close", ret);
2134                 exit(174);
2135         }
2136
2137         if (journal_replay) {
2138                 ret = finalize_journal(ioctx, lastimagename, num_clones - 1,
2139                                        order, stripe_unit, stripe_count);
2140                 if (ret < 0) {
2141                         exit(EXIT_FAILURE);
2142                 }
2143
2144                 ret = register_journal(ioctx, imagename);
2145                 if (ret < 0) {
2146                         exit(EXIT_FAILURE);
2147                 }
2148         }
2149
2150         /*
2151          * Open freshly made clone.
2152          */
2153         if ((ret = ops->open(imagename, &ctx)) < 0) {
2154                 prterrcode("do_clone: ops->open", ret);
2155                 exit(166);
2156         }
2157
2158         if (num_clones > 1) {
2159                 if (journal_replay) {
2160                         check_clone(num_clones - 2, true);
2161                 }
2162                 check_clone(num_clones - 2, false);
2163         }
2164 }
2165
2166 void
2167 check_clone(int clonenum, bool replay_image)
2168 {
2169         char filename[128];
2170         char imagename[128];
2171         int ret, fd;
2172         struct rbd_ctx cur_ctx = RBD_CTX_INIT;
2173         struct stat file_info;
2174         char *good_buf, *temp_buf;
2175
2176         if (replay_image) {
2177                 replay_imagename(imagename, sizeof(imagename), clonenum);
2178         } else {
2179                 clone_imagename(imagename, sizeof(imagename), clonenum);
2180         }
2181
2182         if ((ret = ops->open(imagename, &cur_ctx)) < 0) {
2183                 prterrcode("check_clone: ops->open", ret);
2184                 exit(167);
2185         }
2186
2187         clone_filename(filename, sizeof(filename), clonenum + 1);
2188         if ((fd = open(filename, O_RDONLY)) < 0) {
2189                 simple_err("check_clone: open", -errno);
2190                 exit(168);
2191         }
2192
2193         prt("checking clone #%d, image %s against file %s\n",
2194             clonenum, imagename, filename);
2195         if ((ret = fstat(fd, &file_info)) < 0) {
2196                 simple_err("check_clone: fstat", -errno);
2197                 exit(169);
2198         }
2199
2200         good_buf = NULL;
2201         ret = posix_memalign((void **)&good_buf,
2202                              MAX(writebdy, (int)sizeof(void *)),
2203                              file_info.st_size);
2204         if (ret > 0) {
2205                 prterrcode("check_clone: posix_memalign(good_buf)", -ret);
2206                 exit(96);
2207         }
2208
2209         temp_buf = NULL;
2210         ret = posix_memalign((void **)&temp_buf,
2211                              MAX(readbdy, (int)sizeof(void *)),
2212                              file_info.st_size);
2213         if (ret > 0) {
2214                 prterrcode("check_clone: posix_memalign(temp_buf)", -ret);
2215                 exit(97);
2216         }
2217
2218         if ((ret = pread(fd, good_buf, file_info.st_size, 0)) < 0) {
2219                 simple_err("check_clone: pread", -errno);
2220                 exit(170);
2221         }
2222         if ((ret = ops->read(&cur_ctx, 0, file_info.st_size, temp_buf)) < 0) {
2223                 prterrcode("check_clone: ops->read", ret);
2224                 exit(171);
2225         }
2226         close(fd);
2227         if ((ret = ops->close(&cur_ctx)) < 0) {
2228                 prterrcode("check_clone: ops->close", ret);
2229                 exit(174);
2230         }
2231         check_buffers(good_buf, temp_buf, 0, file_info.st_size);
2232
2233         if (!replay_image) {
2234                 unlink(filename);
2235         }
2236
2237         free(good_buf);
2238         free(temp_buf);
2239 }
2240
2241 void
2242 writefileimage()
2243 {
2244         ssize_t ret;
2245
2246         ret = ops->write(&ctx, 0, file_size, good_buf);
2247         if (ret != file_size) {
2248                 if (ret < 0)
2249                         prterrcode("writefileimage: ops->write", ret);
2250                 else
2251                         prt("short write: 0x%x bytes instead of 0x%llx\n",
2252                             ret, (unsigned long long)file_size);
2253                 report_failure(172);
2254         }
2255
2256         if (!lite) {
2257                 ret = ops->resize(&ctx, file_size);
2258                 if (ret < 0) {
2259                         prterrcode("writefileimage: ops->resize", ret);
2260                         report_failure(173);
2261                 }
2262         }
2263 }
2264
2265 void
2266 do_flatten()
2267 {
2268         int ret;
2269
2270         if (!rbd_image_has_parent(&ctx)) {
2271                 log4(OP_SKIPPED, OP_FLATTEN, 0, 0);
2272                 return;
2273         }
2274         log4(OP_FLATTEN, 0, 0, 0);
2275         prt("%lu flatten\n", testcalls);
2276
2277         ret = ops->flatten(&ctx);
2278         if (ret < 0) {
2279                 prterrcode("writefileimage: ops->flatten", ret);
2280                 exit(177);
2281         }
2282 }
2283
2284 void
2285 docloseopen(void)
2286 {
2287         char *name;
2288         int ret;
2289
2290         if (testcalls <= simulatedopcount)
2291                 return;
2292
2293         name = strdup(ctx.name);
2294
2295         if (debug)
2296                 prt("%lu close/open\n", testcalls);
2297
2298         ret = ops->close(&ctx);
2299         if (ret < 0) {
2300                 prterrcode("docloseopen: ops->close", ret);
2301                 report_failure(180);
2302         }
2303
2304         ret = ops->open(name, &ctx);
2305         if (ret < 0) {
2306                 prterrcode("docloseopen: ops->open", ret);
2307                 report_failure(181);
2308         }
2309
2310         free(name);
2311 }
2312
2313 #define TRIM_OFF_LEN(off, len, size)    \
2314 do {                                    \
2315         if (size)                       \
2316                 (off) %= (size);        \
2317         else                            \
2318                 (off) = 0;              \
2319         if ((unsigned)(off) + (unsigned)(len) > (unsigned)(size))       \
2320                 (len) = (size) - (off); \
2321 } while (0)
2322
2323 void
2324 test(void)
2325 {
2326         unsigned long   offset;
2327         unsigned long   size = maxoplen;
2328         unsigned long   rv = get_random();
2329         unsigned long   op;
2330
2331         if (simulatedopcount > 0 && testcalls == simulatedopcount)
2332                 writefileimage();
2333
2334         testcalls++;
2335
2336         if (closeprob)
2337                 closeopen = (rv >> 3) < (1u << 28) / (unsigned)closeprob;
2338
2339         if (debugstart > 0 && testcalls >= debugstart)
2340                 debug = 1;
2341
2342         if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
2343                 prt("%lu...\n", testcalls);
2344
2345         offset = get_random();
2346         if (randomoplen)
2347                 size = get_random() % (maxoplen + 1);
2348
2349         /* calculate appropriate op to run */
2350         if (lite)
2351                 op = rv % OP_MAX_LITE;
2352         else
2353                 op = rv % OP_MAX_FULL;
2354
2355         switch (op) {
2356         case OP_MAPREAD:
2357                 if (!mapped_reads)
2358                         op = OP_READ;
2359                 break;
2360         case OP_MAPWRITE:
2361                 if (!mapped_writes)
2362                         op = OP_WRITE;
2363                 break;
2364         case OP_FALLOCATE:
2365                 if (!fallocate_calls) {
2366                         log4(OP_SKIPPED, OP_FALLOCATE, offset, size);
2367                         goto out;
2368                 }
2369                 break;
2370         case OP_PUNCH_HOLE:
2371                 if (!punch_hole_calls) {
2372                         log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, size);
2373                         goto out;
2374                 }
2375                 break;
2376         case OP_CLONE:
2377                 /* clone, 8% chance */
2378                 if (!clone_calls || file_size == 0 || get_random() % 100 >= 8) {
2379                         log4(OP_SKIPPED, OP_CLONE, 0, 0);
2380                         goto out;
2381                 }
2382                 break;
2383         case OP_FLATTEN:
2384                 /* flatten four times as rarely as clone, 2% chance */
2385                 if (get_random() % 100 >= 2) {
2386                         log4(OP_SKIPPED, OP_FLATTEN, 0, 0);
2387                         goto out;
2388                 }
2389                 break;
2390         case OP_WRITESAME:
2391                 /* writesame not implemented */
2392                 if (!ops->writesame) {
2393                         log4(OP_SKIPPED, OP_WRITESAME, offset, size);
2394                         goto out;
2395                 }
2396                 break;
2397         case OP_COMPARE_AND_WRITE:
2398                 /* compare_and_write not implemented */
2399                 if (!ops->compare_and_write) {
2400                         log4(OP_SKIPPED, OP_COMPARE_AND_WRITE, offset, size);
2401                         goto out;
2402                 }
2403                 break;
2404         }
2405
2406         switch (op) {
2407         case OP_READ:
2408                 TRIM_OFF_LEN(offset, size, file_size);
2409                 doread(offset, size);
2410                 break;
2411
2412         case OP_WRITE:
2413                 TRIM_OFF_LEN(offset, size, maxfilelen);
2414                 dowrite(offset, size);
2415                 break;
2416
2417         case OP_MAPREAD:
2418                 TRIM_OFF_LEN(offset, size, file_size);
2419                 exit(183);
2420                 break;
2421
2422         case OP_MAPWRITE:
2423                 TRIM_OFF_LEN(offset, size, maxfilelen);
2424                 exit(182);
2425                 break;
2426
2427         case OP_TRUNCATE:
2428                 if (!style)
2429                         size = get_random() % maxfilelen;
2430                 dotruncate(size);
2431                 break;
2432
2433         case OP_PUNCH_HOLE:
2434                 TRIM_OFF_LEN(offset, size, file_size);
2435                 do_punch_hole(offset, size);
2436                 break;
2437
2438         case OP_WRITESAME:
2439                 TRIM_OFF_LEN(offset, size, maxfilelen);
2440                 dowritesame(offset, size);
2441                 break;
2442         case OP_COMPARE_AND_WRITE:
2443                 TRIM_OFF_LEN(offset, size, file_size);
2444                 docompareandwrite(offset, size);
2445                 break;
2446
2447         case OP_CLONE:
2448                 do_clone();
2449                 break;
2450
2451         case OP_FLATTEN:
2452                 do_flatten();
2453                 break;
2454
2455         default:
2456                 prterr("test: unknown operation");
2457                 report_failure(42);
2458                 break;
2459         }
2460
2461 out:
2462         if (sizechecks && testcalls > simulatedopcount)
2463                 check_size();
2464         if (closeopen)
2465                 docloseopen();
2466 }
2467
2468
2469 void
2470 cleanup(int sig)
2471 {
2472         if (sig)
2473                 prt("signal %d\n", sig);
2474         prt("testcalls = %lu\n", testcalls);
2475         exit(sig);
2476 }
2477
2478
2479 void
2480 usage(void)
2481 {
2482         fprintf(stdout, "usage: %s",
2483                 "fsx [-dfjknqxyACFHKLORUWZ] [-b opnum] [-c Prob] [-h holebdy] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] pname iname\n\
2484         -b opnum: beginning operation number (default 1)\n\
2485         -c P: 1 in P chance of file close+open at each op (default infinity)\n\
2486         -d: debug output for all operations\n\
2487         -f: flush and invalidate cache after I/O\n\
2488         -h holebdy: 4096 would make discards page aligned (default 1)\n\
2489         -j: journal replay stress test\n\
2490         -k: keep data on success (default 0)\n\
2491         -l flen: the upper bound on file size (default 262144)\n\
2492         -m startop:endop: monitor (print debug output) specified byte range (default 0:infinity)\n\
2493         -n: no verifications of file size\n\
2494         -o oplen: the upper bound on operation size (default 65536)\n\
2495         -p progressinterval: debug output at specified operation interval\n\
2496         -q: quieter operation\n\
2497         -r readbdy: 4096 would make reads page aligned (default 1)\n\
2498         -s style: 1 gives smaller truncates (default 0)\n\
2499         -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
2500         -w writebdy: 4096 would make writes page aligned (default 1)\n\
2501         -x: preallocate file space before starting, XFS only (default 0)\n\
2502         -y: synchronize changes to a file\n"
2503
2504 "       -C: do not use clone calls\n\
2505         -D startingop: debug output starting at specified operation\n"
2506 #ifdef FALLOCATE
2507 "       -F: Do not use fallocate (preallocation) calls\n"
2508 #endif
2509 "       -H: do not use punch hole calls\n\
2510         -K: enable krbd mode (use -t and -h too)\n\
2511         -M: enable rbd-nbd mode (use -t and -h too)\n\
2512         -L: fsxLite - no file creations & no file size changes\n\
2513         -N numops: total # operations to do (default infinity)\n\
2514         -O: use oplen (see -o flag) for every op (default random)\n\
2515         -P dirpath: save .fsxlog and .fsxgood files in dirpath (default ./)\n\
2516         -R: read() system calls only (mapped reads disabled)\n\
2517         -S seed: for random # generator (default 1) 0 gets timestamp\n\
2518         -U: disable randomized striping\n\
2519         -W: mapped write operations DISabled\n\
2520         -Z: O_DIRECT (use -R, -W, -r and -w too)\n\
2521         poolname: this is REQUIRED (no default)\n\
2522         imagename: this is REQUIRED (no default)\n");
2523         exit(89);
2524 }
2525
2526
2527 int
2528 getnum(char *s, char **e)
2529 {
2530         int ret;
2531
2532         *e = (char *) 0;
2533         ret = strtol(s, e, 0);
2534         if (*e)
2535                 switch (**e) {
2536                 case 'b':
2537                 case 'B':
2538                         ret *= 512;
2539                         *e = *e + 1;
2540                         break;
2541                 case 'k':
2542                 case 'K':
2543                         ret *= 1024;
2544                         *e = *e + 1;
2545                         break;
2546                 case 'm':
2547                 case 'M':
2548                         ret *= 1024*1024;
2549                         *e = *e + 1;
2550                         break;
2551                 case 'w':
2552                 case 'W':
2553                         ret *= 4;
2554                         *e = *e + 1;
2555                         break;
2556                 }
2557         return (ret);
2558 }
2559
2560 void
2561 test_fallocate()
2562 {
2563 #ifdef FALLOCATE
2564         if (!lite && fallocate_calls) {
2565                 if (fallocate(fd, 0, 0, 1) && errno == EOPNOTSUPP) {
2566                         if(!quiet)
2567                                 warn("main: filesystem does not support fallocate, disabling\n");
2568                         fallocate_calls = 0;
2569                 } else {
2570                         ftruncate(fd, 0);
2571                 }
2572         }
2573 #else /* ! FALLOCATE */
2574         fallocate_calls = 0;
2575 #endif
2576
2577 }
2578
2579 void remove_image(rados_ioctx_t ioctx, char *imagename, bool remove_snap,
2580                   bool unregister) {
2581         rbd_image_t image;
2582         char errmsg[128];
2583         int ret;
2584
2585         if ((ret = rbd_open(ioctx, imagename, &image, NULL)) < 0) {
2586                 sprintf(errmsg, "rbd_open %s", imagename);
2587                 prterrcode(errmsg, ret);
2588                 report_failure(101);
2589         }
2590         if (remove_snap) {
2591                 if ((ret = rbd_snap_unprotect(image, "snap")) < 0) {
2592                         sprintf(errmsg, "rbd_snap_unprotect %s@snap",
2593                                 imagename);
2594                         prterrcode(errmsg, ret);
2595                         report_failure(102);
2596                 }
2597                 if ((ret = rbd_snap_remove(image, "snap")) < 0) {
2598                         sprintf(errmsg, "rbd_snap_remove %s@snap",
2599                                 imagename);
2600                         prterrcode(errmsg, ret);
2601                         report_failure(103);
2602                 }
2603         }
2604         if ((ret = rbd_close(image)) < 0) {
2605                 sprintf(errmsg, "rbd_close %s", imagename);
2606                 prterrcode(errmsg, ret);
2607                 report_failure(104);
2608         }
2609
2610         if (unregister &&
2611             (ret = unregister_journal(ioctx, imagename)) < 0) {
2612                 report_failure(105);
2613         }
2614
2615         if ((ret = rbd_remove(ioctx, imagename)) < 0) {
2616                 sprintf(errmsg, "rbd_remove %s", imagename);
2617                 prterrcode(errmsg, ret);
2618                 report_failure(106);
2619         }
2620 }
2621
2622 int
2623 main(int argc, char **argv)
2624 {
2625         int     i, style, ch, ret;
2626         char    *endp;
2627         char goodfile[1024];
2628         char logfile[1024];
2629
2630         goodfile[0] = 0;
2631         logfile[0] = 0;
2632
2633         page_size = getpagesize();
2634         page_mask = page_size - 1;
2635         mmap_mask = page_mask;
2636
2637         setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
2638
2639         while ((ch = getopt(argc, argv, "b:c:dfh:jkl:m:no:p:qr:s:t:w:xyCD:FHKMLN:OP:RS:UWZ"))
2640                != EOF)
2641                 switch (ch) {
2642                 case 'b':
2643                         simulatedopcount = getnum(optarg, &endp);
2644                         if (!quiet)
2645                                 fprintf(stdout, "Will begin at operation %lu\n",
2646                                         simulatedopcount);
2647                         if (simulatedopcount == 0)
2648                                 usage();
2649                         simulatedopcount -= 1;
2650                         break;
2651                 case 'c':
2652                         closeprob = getnum(optarg, &endp);
2653                         if (!quiet)
2654                                 fprintf(stdout,
2655                                         "Chance of close/open is 1 in %d\n",
2656                                         closeprob);
2657                         if (closeprob <= 0)
2658                                 usage();
2659                         break;
2660                 case 'd':
2661                         debug = 1;
2662                         break;
2663                 case 'f':
2664                         flush_enabled = 1;
2665                         break;
2666                 case 'h':
2667                         holebdy = getnum(optarg, &endp);
2668                         if (holebdy <= 0)
2669                                 usage();
2670                         break;
2671                 case 'j':
2672                         journal_replay = true;
2673                         break;
2674                 case 'k':
2675                         keep_on_success = 1;
2676                         break;
2677                 case 'l':
2678                         {
2679                                 int _num = getnum(optarg, &endp);
2680                                 if (_num <= 0)
2681                                         usage();
2682                                 maxfilelen = _num;
2683                         }
2684                         break;
2685                 case 'm':
2686                         monitorstart = getnum(optarg, &endp);
2687                         if (monitorstart < 0)
2688                                 usage();
2689                         if (!endp || *endp++ != ':')
2690                                 usage();
2691                         monitorend = getnum(endp, &endp);
2692                         if (monitorend < 0)
2693                                 usage();
2694                         if (monitorend == 0)
2695                                 monitorend = -1; /* aka infinity */
2696                         debug = 1;
2697                         break;
2698                 case 'n':
2699                         sizechecks = 0;
2700                         break;
2701                 case 'o':
2702                         maxoplen = getnum(optarg, &endp);
2703                         if (maxoplen <= 0)
2704                                 usage();
2705                         break;
2706                 case 'p':
2707                         progressinterval = getnum(optarg, &endp);
2708                         if (progressinterval == 0)
2709                                 usage();
2710                         break;
2711                 case 'q':
2712                         quiet = 1;
2713                         break;
2714                 case 'r':
2715                         readbdy = getnum(optarg, &endp);
2716                         if (readbdy <= 0)
2717                                 usage();
2718                         break;
2719                 case 's':
2720                         style = getnum(optarg, &endp);
2721                         if (style < 0 || style > 1)
2722                                 usage();
2723                         break;
2724                 case 't':
2725                         truncbdy = getnum(optarg, &endp);
2726                         if (truncbdy <= 0)
2727                                 usage();
2728                         break;
2729                 case 'w':
2730                         writebdy = getnum(optarg, &endp);
2731                         if (writebdy <= 0)
2732                                 usage();
2733                         break;
2734                 case 'x':
2735                         prealloc = 1;
2736                         break;
2737                 case 'y':
2738                         do_fsync = 1;
2739                         break;
2740                 case 'C':
2741                         clone_calls = 0;
2742                         break;
2743                 case 'D':
2744                         debugstart = getnum(optarg, &endp);
2745                         if (debugstart < 1)
2746                                 usage();
2747                         break;
2748                 case 'F':
2749                         fallocate_calls = 0;
2750                         break;
2751                 case 'H':
2752                         punch_hole_calls = 0;
2753                         break;
2754                 case 'K':
2755                         prt("krbd mode enabled\n");
2756                         ops = &krbd_operations;
2757                         break;
2758                 case 'M':
2759                         prt("rbd-nbd mode enabled\n");
2760                         ops = &nbd_operations;
2761                         break;
2762                 case 'L':
2763                         prt("lite mode not supported for rbd\n");
2764                         exit(1);
2765                         break;
2766                 case 'N':
2767                         numops = getnum(optarg, &endp);
2768                         if (numops < 0)
2769                                 usage();
2770                         break;
2771                 case 'O':
2772                         randomoplen = 0;
2773                         break;
2774                 case 'P':
2775                         strncpy(dirpath, optarg, sizeof(dirpath)-1);
2776                         dirpath[sizeof(dirpath)-1] = '\0';
2777                         strncpy(goodfile, dirpath, sizeof(goodfile)-1);
2778                         goodfile[sizeof(goodfile)-1] = '\0';
2779                         if (strlen(goodfile) < sizeof(goodfile)-2) {
2780                                 strcat(goodfile, "/");
2781                         } else {
2782                                 prt("file name to long\n");
2783                                 exit(1);
2784                         }
2785                         strncpy(logfile, dirpath, sizeof(logfile)-1);
2786                         logfile[sizeof(logfile)-1] = '\0';
2787                         if (strlen(logfile) < sizeof(logfile)-2) {
2788                                 strcat(logfile, "/");
2789                         } else {
2790                                 prt("file path to long\n");
2791                                 exit(1);
2792                         }
2793                         break;
2794                 case 'R':
2795                         mapped_reads = 0;
2796                         if (!quiet)
2797                                 fprintf(stdout, "mapped reads DISABLED\n");
2798                         break;
2799                 case 'S':
2800                         seed = getnum(optarg, &endp);
2801                         if (seed == 0)
2802                                 seed = time(0) % 10000;
2803                         if (!quiet)
2804                                 fprintf(stdout, "Seed set to %d\n", seed);
2805                         if (seed < 0)
2806                                 usage();
2807                         break;
2808                 case 'U':
2809                         randomize_striping = 0;
2810                         break;
2811                 case 'W':
2812                         mapped_writes = 0;
2813                         if (!quiet)
2814                                 fprintf(stdout, "mapped writes DISABLED\n");
2815                         break;
2816                 case 'Z':
2817                         o_direct = O_DIRECT;
2818                         break;
2819                 default:
2820                         usage();
2821                         /* NOTREACHED */
2822                 }
2823         argc -= optind;
2824         argv += optind;
2825         if (argc != 2)
2826                 usage();
2827         pool = argv[0];
2828         iname = argv[1];
2829
2830         signal(SIGHUP,  cleanup);
2831         signal(SIGINT,  cleanup);
2832         signal(SIGPIPE, cleanup);
2833         signal(SIGALRM, cleanup);
2834         signal(SIGTERM, cleanup);
2835         signal(SIGXCPU, cleanup);
2836         signal(SIGXFSZ, cleanup);
2837         signal(SIGVTALRM,       cleanup);
2838         signal(SIGUSR1, cleanup);
2839         signal(SIGUSR2, cleanup);
2840
2841         random_generator.seed(seed);
2842
2843         ret = create_image();
2844         if (ret < 0) {
2845                 prterrcode(iname, ret);
2846                 exit(90);
2847         }
2848         ret = ops->open(iname, &ctx);
2849         if (ret < 0) {
2850                 simple_err("Error opening image", ret);
2851                 exit(91);
2852         }
2853         if (!dirpath[0])
2854                 strcat(dirpath, ".");
2855         strncat(goodfile, iname, 256);
2856         strcat (goodfile, ".fsxgood");
2857         fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666);
2858         if (fsxgoodfd < 0) {
2859                 prterr(goodfile);
2860                 exit(92);
2861         }
2862         strncat(logfile, iname, 256);
2863         strcat (logfile, ".fsxlog");
2864         fsxlogf = fopen(logfile, "w");
2865         if (fsxlogf == NULL) {
2866                 prterr(logfile);
2867                 exit(93);
2868         }
2869
2870         original_buf = (char *) malloc(maxfilelen);
2871         for (i = 0; i < (int)maxfilelen; i++)
2872                 original_buf[i] = get_random() % 256;
2873
2874         ret = posix_memalign((void **)&good_buf,
2875                              MAX(writebdy, (int)sizeof(void *)), maxfilelen);
2876         if (ret > 0) {
2877                 if (ret == EINVAL)
2878                         prt("writebdy is not a suitable power of two\n");
2879                 else
2880                         prterrcode("main: posix_memalign(good_buf)", -ret);
2881                 exit(94);
2882         }
2883         memset(good_buf, '\0', maxfilelen);
2884
2885         ret = posix_memalign((void **)&temp_buf,
2886                              MAX(readbdy, (int)sizeof(void *)), maxfilelen);
2887         if (ret > 0) {
2888                 if (ret == EINVAL)
2889                         prt("readbdy is not a suitable power of two\n");
2890                 else
2891                         prterrcode("main: posix_memalign(temp_buf)", -ret);
2892                 exit(95);
2893         }
2894         memset(temp_buf, '\0', maxfilelen);
2895
2896         if (lite) {     /* zero entire existing file */
2897                 ssize_t written;
2898
2899                 written = ops->write(&ctx, 0, (size_t)maxfilelen, good_buf);
2900                 if (written != (ssize_t)maxfilelen) {
2901                         if (written < 0) {
2902                                 prterrcode(iname, written);
2903                                 warn("main: error on write");
2904                         } else
2905                                 warn("main: short write, 0x%x bytes instead "
2906                                         "of 0x%lx\n",
2907                                         (unsigned)written,
2908                                         maxfilelen);
2909                         exit(98);
2910                 }
2911         } else
2912                 check_trunc_hack();
2913
2914         //test_fallocate();
2915
2916         while (numops == -1 || numops--)
2917                 test();
2918
2919         ret = ops->close(&ctx);
2920         if (ret < 0) {
2921                 prterrcode("ops->close", ret);
2922                 report_failure(99);
2923         }
2924
2925         if (journal_replay) {
2926                 char imagename[1024];
2927                 clone_imagename(imagename, sizeof(imagename), num_clones);
2928                 ret = finalize_journal(ioctx, imagename, num_clones, 0, 0, 0);
2929                 if (ret < 0) {
2930                         report_failure(100);
2931                 }
2932         }
2933
2934         if (num_clones > 0) {
2935                 if (journal_replay) {
2936                         check_clone(num_clones - 1, true);
2937                 }
2938                 check_clone(num_clones - 1, false);
2939         }
2940
2941         if (!keep_on_success) {
2942                 while (num_clones >= 0) {
2943                         static bool remove_snap = false;
2944
2945                         if (journal_replay) {
2946                                 char replayimagename[1024];
2947                                 replay_imagename(replayimagename,
2948                                                  sizeof(replayimagename),
2949                                                  num_clones);
2950                                 remove_image(ioctx, replayimagename,
2951                                              remove_snap,
2952                                              false);
2953                         }
2954
2955                         char clonename[128];
2956                         clone_imagename(clonename, 128, num_clones);
2957                         remove_image(ioctx, clonename, remove_snap,
2958                                      journal_replay);
2959
2960                         remove_snap = true;
2961                         num_clones--;
2962                 }
2963         }
2964
2965         prt("All operations completed A-OK!\n");
2966         fclose(fsxlogf);
2967
2968         rados_ioctx_destroy(ioctx);
2969         krbd_destroy(krbd);
2970         rados_shutdown(cluster);
2971
2972         free(original_buf);
2973         free(good_buf);
2974         free(temp_buf);
2975
2976         exit(0);
2977         return 0;
2978 }