1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include <boost/assign/list_of.hpp>
7 #include "common/ceph_context.h"
8 #include "common/dout.h"
9 #include "common/errno.h"
10 #include "common/perf_counters.h"
11 #include "common/WorkQueue.h"
12 #include "common/Timer.h"
14 #include "librbd/AsyncRequest.h"
15 #include "librbd/ExclusiveLock.h"
16 #include "librbd/internal.h"
17 #include "librbd/ImageCtx.h"
18 #include "librbd/ImageState.h"
19 #include "librbd/ImageWatcher.h"
20 #include "librbd/Journal.h"
21 #include "librbd/LibrbdAdminSocketHook.h"
22 #include "librbd/ObjectMap.h"
23 #include "librbd/Operations.h"
24 #include "librbd/operation/ResizeRequest.h"
25 #include "librbd/Utils.h"
26 #include "librbd/LibrbdWriteback.h"
27 #include "librbd/exclusive_lock/AutomaticPolicy.h"
28 #include "librbd/exclusive_lock/StandardPolicy.h"
29 #include "librbd/io/AioCompletion.h"
30 #include "librbd/io/AsyncOperation.h"
31 #include "librbd/io/ImageRequestWQ.h"
32 #include "librbd/journal/StandardPolicy.h"
34 #include "osdc/Striper.h"
35 #include <boost/bind.hpp>
37 #define dout_subsys ceph_subsys_rbd
39 #define dout_prefix *_dout << "librbd::ImageCtx: "
47 using ceph::bufferlist;
48 using librados::snap_t;
49 using librados::IoCtx;
55 class ThreadPoolSingleton : public ThreadPool {
57 ContextWQ *op_work_queue;
59 explicit ThreadPoolSingleton(CephContext *cct)
60 : ThreadPool(cct, "librbd::thread_pool", "tp_librbd", 1,
62 op_work_queue(new ContextWQ("librbd::op_work_queue",
63 cct->_conf->get_val<int64_t>("rbd_op_thread_timeout"),
67 ~ThreadPoolSingleton() override {
68 op_work_queue->drain();
75 class SafeTimerSingleton : public SafeTimer {
79 explicit SafeTimerSingleton(CephContext *cct)
80 : SafeTimer(cct, lock, true),
81 lock("librbd::Journal::SafeTimerSingleton::lock") {
84 ~SafeTimerSingleton() {
85 Mutex::Locker locker(lock);
90 struct C_FlushCache : public Context {
94 C_FlushCache(ImageCtx *_image_ctx, Context *_on_safe)
95 : image_ctx(_image_ctx), on_safe(_on_safe) {
97 void finish(int r) override {
98 // successful cache flush indicates all IO is now safe
99 image_ctx->flush_cache(on_safe);
103 struct C_ShutDownCache : public Context {
107 C_ShutDownCache(ImageCtx *_image_ctx, Context *_on_finish)
108 : image_ctx(_image_ctx), on_finish(_on_finish) {
110 void finish(int r) override {
111 image_ctx->object_cacher->stop();
112 on_finish->complete(r);
116 struct C_InvalidateCache : public Context {
122 C_InvalidateCache(ImageCtx *_image_ctx, bool _purge_on_error,
123 bool _reentrant_safe, Context *_on_finish)
124 : image_ctx(_image_ctx), purge_on_error(_purge_on_error),
125 reentrant_safe(_reentrant_safe), on_finish(_on_finish) {
127 void finish(int r) override {
128 assert(image_ctx->cache_lock.is_locked());
129 CephContext *cct = image_ctx->cct;
131 if (r == -EBLACKLISTED) {
132 lderr(cct) << "Blacklisted during flush! Purging cache..." << dendl;
133 image_ctx->object_cacher->purge_set(image_ctx->object_set);
134 } else if (r != 0 && purge_on_error) {
135 lderr(cct) << "invalidate cache encountered error "
136 << cpp_strerror(r) << " !Purging cache..." << dendl;
137 image_ctx->object_cacher->purge_set(image_ctx->object_set);
139 lderr(cct) << "flush_cache returned " << r << dendl;
142 loff_t unclean = image_ctx->object_cacher->release_set(
143 image_ctx->object_set);
147 lderr(cct) << "could not release all objects from cache: "
148 << unclean << " bytes remain" << dendl;
154 if (reentrant_safe) {
155 on_finish->complete(r);
157 image_ctx->op_work_queue->queue(on_finish, r);
163 } // anonymous namespace
165 const string ImageCtx::METADATA_CONF_PREFIX = "conf_";
167 ImageCtx::ImageCtx(const string &image_name, const string &image_id,
168 const char *snap, IoCtx& p, bool ro)
169 : cct((CephContext*)p.cct()),
171 snap_id(CEPH_NOSNAP),
174 flush_encountered(false),
175 exclusive_locked(false),
179 owner_lock(util::unique_lock_name("librbd::ImageCtx::owner_lock", this)),
180 md_lock(util::unique_lock_name("librbd::ImageCtx::md_lock", this)),
181 cache_lock(util::unique_lock_name("librbd::ImageCtx::cache_lock", this)),
182 snap_lock(util::unique_lock_name("librbd::ImageCtx::snap_lock", this)),
183 parent_lock(util::unique_lock_name("librbd::ImageCtx::parent_lock", this)),
184 object_map_lock(util::unique_lock_name("librbd::ImageCtx::object_map_lock", this)),
185 async_ops_lock(util::unique_lock_name("librbd::ImageCtx::async_ops_lock", this)),
186 copyup_list_lock(util::unique_lock_name("librbd::ImageCtx::copyup_list_lock", this)),
187 completed_reqs_lock(util::unique_lock_name("librbd::ImageCtx::completed_reqs_lock", this)),
190 order(0), size(0), features(0),
192 id(image_id), parent(NULL),
193 stripe_unit(0), stripe_count(0), flags(0),
194 object_cacher(NULL), writeback_handler(NULL), object_set(NULL),
197 state(new ImageState<>(this)),
198 operations(new Operations<>(*this)),
199 exclusive_lock(nullptr), object_map(nullptr),
200 io_work_queue(nullptr), op_work_queue(nullptr),
202 trace_endpoint("librbd")
209 memset(&header, 0, sizeof(header));
211 ThreadPool *thread_pool;
212 get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
213 io_work_queue = new io::ImageRequestWQ<>(
214 this, "librbd::io_work_queue",
215 cct->_conf->get_val<int64_t>("rbd_op_thread_timeout"),
218 if (cct->_conf->get_val<bool>("rbd_auto_exclusive_lock_until_manual_request")) {
219 exclusive_lock_policy = new exclusive_lock::AutomaticPolicy(this);
221 exclusive_lock_policy = new exclusive_lock::StandardPolicy(this);
223 journal_policy = new journal::StandardPolicy<ImageCtx>(this);
226 ImageCtx::~ImageCtx() {
227 assert(image_watcher == NULL);
228 assert(exclusive_lock == NULL);
229 assert(object_map == NULL);
230 assert(journal == NULL);
231 assert(asok_hook == NULL);
237 delete object_cacher;
238 object_cacher = NULL;
240 if (writeback_handler) {
241 delete writeback_handler;
242 writeback_handler = NULL;
248 delete[] format_string;
251 data_ctx.aio_flush();
252 io_work_queue->drain();
254 delete journal_policy;
255 delete exclusive_lock_policy;
256 delete io_work_queue;
261 void ImageCtx::init() {
262 assert(!header_oid.empty());
263 assert(old_format || !id.empty());
265 asok_hook = new LibrbdAdminSocketHook(this);
267 string pname = string("librbd-") + id + string("-") +
268 data_ctx.get_pool_name() + string("-") + name;
269 if (!snap_name.empty()) {
274 trace_endpoint.copy_name(pname);
278 Mutex::Locker l(cache_lock);
279 ldout(cct, 20) << "enabling caching..." << dendl;
280 writeback_handler = new LibrbdWriteback(this, cache_lock);
282 uint64_t init_max_dirty = cache_max_dirty;
283 if (cache_writethrough_until_flush)
285 ldout(cct, 20) << "Initial cache settings:"
286 << " size=" << cache_size
287 << " num_objects=" << 10
288 << " max_dirty=" << init_max_dirty
289 << " target_dirty=" << cache_target_dirty
291 << cache_max_dirty_age << dendl;
293 object_cacher = new ObjectCacher(cct, pname, *writeback_handler, cache_lock,
296 10, /* reset this in init */
300 cache_block_writes_upfront);
302 // size object cache appropriately
303 uint64_t obj = cache_max_dirty_object;
305 obj = MIN(2000, MAX(10, cache_size / 100 / sizeof(ObjectCacher::Object)));
307 ldout(cct, 10) << " cache bytes " << cache_size
308 << " -> about " << obj << " objects" << dendl;
309 object_cacher->set_max_objects(obj);
311 object_set = new ObjectCacher::ObjectSet(NULL, data_ctx.get_id(), 0);
312 object_set->return_enoent = true;
313 object_cacher->start();
316 readahead.set_trigger_requests(readahead_trigger_requests);
317 readahead.set_max_readahead_size(readahead_max_bytes);
320 void ImageCtx::shutdown() {
321 delete image_watcher;
322 image_watcher = nullptr;
328 void ImageCtx::init_layout()
330 if (stripe_unit == 0 || stripe_count == 0) {
331 stripe_unit = 1ull << order;
335 vector<uint64_t> alignments;
336 alignments.push_back(stripe_count << order); // object set (in file striping terminology)
337 alignments.push_back(stripe_unit * stripe_count); // stripe
338 alignments.push_back(stripe_unit); // stripe unit
339 readahead.set_alignments(alignments);
341 layout = file_layout_t();
342 layout.stripe_unit = stripe_unit;
343 layout.stripe_count = stripe_count;
344 layout.object_size = 1ull << order;
345 layout.pool_id = data_ctx.get_id(); // FIXME: pool id overflow?
347 delete[] format_string;
348 size_t len = object_prefix.length() + 16;
349 format_string = new char[len];
351 snprintf(format_string, len, "%s.%%012llx", object_prefix.c_str());
353 snprintf(format_string, len, "%s.%%016llx", object_prefix.c_str());
356 ldout(cct, 10) << "init_layout stripe_unit " << stripe_unit
357 << " stripe_count " << stripe_count
358 << " object_size " << layout.object_size
359 << " prefix " << object_prefix
360 << " format " << format_string
364 void ImageCtx::perf_start(string name) {
365 PerfCountersBuilder plb(cct, name, l_librbd_first, l_librbd_last);
367 plb.add_u64_counter(l_librbd_rd, "rd", "Reads");
368 plb.add_u64_counter(l_librbd_rd_bytes, "rd_bytes", "Data size in reads");
369 plb.add_time_avg(l_librbd_rd_latency, "rd_latency", "Latency of reads");
370 plb.add_u64_counter(l_librbd_wr, "wr", "Writes");
371 plb.add_u64_counter(l_librbd_wr_bytes, "wr_bytes", "Written data");
372 plb.add_time_avg(l_librbd_wr_latency, "wr_latency", "Write latency");
373 plb.add_u64_counter(l_librbd_discard, "discard", "Discards");
374 plb.add_u64_counter(l_librbd_discard_bytes, "discard_bytes", "Discarded data");
375 plb.add_time_avg(l_librbd_discard_latency, "discard_latency", "Discard latency");
376 plb.add_u64_counter(l_librbd_flush, "flush", "Flushes");
377 plb.add_u64_counter(l_librbd_aio_flush, "aio_flush", "Async flushes");
378 plb.add_time_avg(l_librbd_aio_flush_latency, "aio_flush_latency", "Latency of async flushes");
379 plb.add_u64_counter(l_librbd_ws, "ws", "WriteSames");
380 plb.add_u64_counter(l_librbd_ws_bytes, "ws_bytes", "WriteSame data");
381 plb.add_time_avg(l_librbd_ws_latency, "ws_latency", "WriteSame latency");
382 plb.add_u64_counter(l_librbd_cmp, "cmp", "CompareAndWrites");
383 plb.add_u64_counter(l_librbd_cmp_bytes, "cmp_bytes", "Data size in cmps");
384 plb.add_time_avg(l_librbd_cmp_latency, "cmp_latency", "Latency of cmps");
385 plb.add_u64_counter(l_librbd_snap_create, "snap_create", "Snap creations");
386 plb.add_u64_counter(l_librbd_snap_remove, "snap_remove", "Snap removals");
387 plb.add_u64_counter(l_librbd_snap_rollback, "snap_rollback", "Snap rollbacks");
388 plb.add_u64_counter(l_librbd_snap_rename, "snap_rename", "Snap rename");
389 plb.add_u64_counter(l_librbd_notify, "notify", "Updated header notifications");
390 plb.add_u64_counter(l_librbd_resize, "resize", "Resizes");
391 plb.add_u64_counter(l_librbd_readahead, "readahead", "Read ahead");
392 plb.add_u64_counter(l_librbd_readahead_bytes, "readahead_bytes", "Data size in read ahead");
393 plb.add_u64_counter(l_librbd_invalidate_cache, "invalidate_cache", "Cache invalidates");
395 perfcounter = plb.create_perf_counters();
396 cct->get_perfcounters_collection()->add(perfcounter);
399 void ImageCtx::perf_stop() {
401 cct->get_perfcounters_collection()->remove(perfcounter);
405 void ImageCtx::set_read_flag(unsigned flag) {
406 extra_read_flags |= flag;
409 int ImageCtx::get_read_flags(snap_t snap_id) {
410 int flags = librados::OPERATION_NOFLAG | extra_read_flags;
411 if (snap_id == LIBRADOS_SNAP_HEAD)
414 if (balance_snap_reads)
415 flags |= librados::OPERATION_BALANCE_READS;
416 else if (localize_snap_reads)
417 flags |= librados::OPERATION_LOCALIZE_READS;
421 int ImageCtx::snap_set(cls::rbd::SnapshotNamespace in_snap_namespace,
424 assert(snap_lock.is_wlocked());
425 snap_t in_snap_id = get_snap_id(in_snap_namespace, in_snap_name);
426 if (in_snap_id != CEPH_NOSNAP) {
427 snap_id = in_snap_id;
428 snap_namespace = in_snap_namespace;
429 snap_name = in_snap_name;
431 data_ctx.snap_set_read(snap_id);
437 void ImageCtx::snap_unset()
439 assert(snap_lock.is_wlocked());
440 snap_id = CEPH_NOSNAP;
444 data_ctx.snap_set_read(snap_id);
447 snap_t ImageCtx::get_snap_id(cls::rbd::SnapshotNamespace in_snap_namespace,
448 string in_snap_name) const
450 assert(snap_lock.is_locked());
451 auto it = snap_ids.find({in_snap_namespace, in_snap_name});
452 if (it != snap_ids.end())
457 const SnapInfo* ImageCtx::get_snap_info(snap_t in_snap_id) const
459 assert(snap_lock.is_locked());
460 map<snap_t, SnapInfo>::const_iterator it =
461 snap_info.find(in_snap_id);
462 if (it != snap_info.end())
467 int ImageCtx::get_snap_name(snap_t in_snap_id,
468 string *out_snap_name) const
470 assert(snap_lock.is_locked());
471 const SnapInfo *info = get_snap_info(in_snap_id);
473 *out_snap_name = info->name;
479 int ImageCtx::get_snap_namespace(snap_t in_snap_id,
480 cls::rbd::SnapshotNamespace *out_snap_namespace) const
482 assert(snap_lock.is_locked());
483 const SnapInfo *info = get_snap_info(in_snap_id);
485 *out_snap_namespace = info->snap_namespace;
491 int ImageCtx::get_parent_spec(snap_t in_snap_id,
492 ParentSpec *out_pspec) const
494 const SnapInfo *info = get_snap_info(in_snap_id);
496 *out_pspec = info->parent.spec;
502 uint64_t ImageCtx::get_current_size() const
504 assert(snap_lock.is_locked());
508 uint64_t ImageCtx::get_object_size() const
510 return 1ull << order;
513 string ImageCtx::get_object_name(uint64_t num) const {
514 char buf[object_prefix.length() + 32];
515 snprintf(buf, sizeof(buf), format_string, num);
519 uint64_t ImageCtx::get_stripe_unit() const
524 uint64_t ImageCtx::get_stripe_count() const
529 uint64_t ImageCtx::get_stripe_period() const
531 return stripe_count * (1ull << order);
534 utime_t ImageCtx::get_create_timestamp() const
536 return create_timestamp;
539 int ImageCtx::is_snap_protected(snap_t in_snap_id,
540 bool *is_protected) const
542 assert(snap_lock.is_locked());
543 const SnapInfo *info = get_snap_info(in_snap_id);
546 (info->protection_status == RBD_PROTECTION_STATUS_PROTECTED);
552 int ImageCtx::is_snap_unprotected(snap_t in_snap_id,
553 bool *is_unprotected) const
555 assert(snap_lock.is_locked());
556 const SnapInfo *info = get_snap_info(in_snap_id);
559 (info->protection_status == RBD_PROTECTION_STATUS_UNPROTECTED);
565 void ImageCtx::add_snap(cls::rbd::SnapshotNamespace in_snap_namespace,
567 snap_t id, uint64_t in_size,
568 const ParentInfo &parent, uint8_t protection_status,
569 uint64_t flags, utime_t timestamp)
571 assert(snap_lock.is_wlocked());
573 SnapInfo info(in_snap_name, in_snap_namespace,
574 in_size, parent, protection_status, flags, timestamp);
575 snap_info.insert({id, info});
576 snap_ids.insert({{in_snap_namespace, in_snap_name}, id});
579 void ImageCtx::rm_snap(cls::rbd::SnapshotNamespace in_snap_namespace,
583 assert(snap_lock.is_wlocked());
584 snaps.erase(std::remove(snaps.begin(), snaps.end(), id), snaps.end());
586 snap_ids.erase({in_snap_namespace, in_snap_name});
589 uint64_t ImageCtx::get_image_size(snap_t in_snap_id) const
591 assert(snap_lock.is_locked());
592 if (in_snap_id == CEPH_NOSNAP) {
593 if (!resize_reqs.empty() &&
594 resize_reqs.front()->shrinking()) {
595 return resize_reqs.front()->get_image_size();
600 const SnapInfo *info = get_snap_info(in_snap_id);
607 uint64_t ImageCtx::get_object_count(snap_t in_snap_id) const {
608 assert(snap_lock.is_locked());
609 uint64_t image_size = get_image_size(in_snap_id);
610 return Striper::get_num_objects(layout, image_size);
613 bool ImageCtx::test_features(uint64_t features) const
615 RWLock::RLocker l(snap_lock);
616 return test_features(features, snap_lock);
619 bool ImageCtx::test_features(uint64_t in_features,
620 const RWLock &in_snap_lock) const
622 assert(snap_lock.is_locked());
623 return ((features & in_features) == in_features);
626 int ImageCtx::get_flags(librados::snap_t _snap_id, uint64_t *_flags) const
628 assert(snap_lock.is_locked());
629 if (_snap_id == CEPH_NOSNAP) {
633 const SnapInfo *info = get_snap_info(_snap_id);
635 *_flags = info->flags;
641 int ImageCtx::test_flags(uint64_t flags, bool *flags_set) const
643 RWLock::RLocker l(snap_lock);
644 return test_flags(flags, snap_lock, flags_set);
647 int ImageCtx::test_flags(uint64_t flags, const RWLock &in_snap_lock,
648 bool *flags_set) const
650 assert(snap_lock.is_locked());
652 int r = get_flags(snap_id, &snap_flags);
656 *flags_set = ((snap_flags & flags) == flags);
660 int ImageCtx::update_flags(snap_t in_snap_id, uint64_t flag, bool enabled)
662 assert(snap_lock.is_wlocked());
664 if (in_snap_id == CEPH_NOSNAP) {
667 map<snap_t, SnapInfo>::iterator it = snap_info.find(in_snap_id);
668 if (it == snap_info.end()) {
671 _flags = &it->second.flags;
682 const ParentInfo* ImageCtx::get_parent_info(snap_t in_snap_id) const
684 assert(snap_lock.is_locked());
685 assert(parent_lock.is_locked());
686 if (in_snap_id == CEPH_NOSNAP)
688 const SnapInfo *info = get_snap_info(in_snap_id);
690 return &info->parent;
694 int64_t ImageCtx::get_parent_pool_id(snap_t in_snap_id) const
696 const ParentInfo *info = get_parent_info(in_snap_id);
698 return info->spec.pool_id;
702 string ImageCtx::get_parent_image_id(snap_t in_snap_id) const
704 const ParentInfo *info = get_parent_info(in_snap_id);
706 return info->spec.image_id;
710 uint64_t ImageCtx::get_parent_snap_id(snap_t in_snap_id) const
712 const ParentInfo *info = get_parent_info(in_snap_id);
714 return info->spec.snap_id;
718 int ImageCtx::get_parent_overlap(snap_t in_snap_id, uint64_t *overlap) const
720 assert(snap_lock.is_locked());
721 const ParentInfo *info = get_parent_info(in_snap_id);
723 *overlap = info->overlap;
729 void ImageCtx::aio_read_from_cache(object_t o, uint64_t object_no,
730 bufferlist *bl, size_t len,
731 uint64_t off, Context *onfinish,
732 int fadvise_flags, ZTracer::Trace *trace) {
733 snap_lock.get_read();
734 ObjectCacher::OSDRead *rd = object_cacher->prepare_read(snap_id, bl, fadvise_flags);
735 snap_lock.put_read();
736 ObjectExtent extent(o, object_no, off, len, 0);
737 extent.oloc.pool = data_ctx.get_id();
738 extent.buffer_extents.push_back(make_pair(0, len));
739 rd->extents.push_back(extent);
741 int r = object_cacher->readx(rd, object_set, onfinish, trace);
744 onfinish->complete(r);
747 void ImageCtx::write_to_cache(object_t o, const bufferlist& bl, size_t len,
748 uint64_t off, Context *onfinish,
749 int fadvise_flags, uint64_t journal_tid,
750 ZTracer::Trace *trace) {
751 snap_lock.get_read();
752 ObjectCacher::OSDWrite *wr = object_cacher->prepare_write(
753 snapc, bl, ceph::real_time::min(), fadvise_flags, journal_tid);
754 snap_lock.put_read();
755 ObjectExtent extent(o, 0, off, len, 0);
756 extent.oloc.pool = data_ctx.get_id();
757 // XXX: nspace is always default, io_ctx_impl field private
758 //extent.oloc.nspace = data_ctx.io_ctx_impl->oloc.nspace;
759 extent.buffer_extents.push_back(make_pair(0, len));
760 wr->extents.push_back(extent);
762 Mutex::Locker l(cache_lock);
763 object_cacher->writex(wr, object_set, onfinish, trace);
767 void ImageCtx::user_flushed() {
768 if (object_cacher && cache_writethrough_until_flush) {
770 bool flushed_before = flush_encountered;
773 uint64_t max_dirty = cache_max_dirty;
774 if (!flushed_before && max_dirty > 0) {
776 flush_encountered = true;
779 ldout(cct, 10) << "saw first user flush, enabling writeback" << dendl;
780 Mutex::Locker l(cache_lock);
781 object_cacher->set_max_dirty(max_dirty);
786 void ImageCtx::flush_cache(Context *onfinish) {
788 object_cacher->flush_set(object_set, onfinish);
792 void ImageCtx::shut_down_cache(Context *on_finish) {
793 if (object_cacher == NULL) {
794 on_finish->complete(0);
799 object_cacher->release_set(object_set);
802 C_ShutDownCache *shut_down = new C_ShutDownCache(this, on_finish);
803 flush_cache(new C_InvalidateCache(this, true, false, shut_down));
806 int ImageCtx::invalidate_cache(bool purge_on_error) {
807 flush_async_operations();
808 if (object_cacher == NULL) {
813 object_cacher->release_set(object_set);
817 flush_cache(new C_InvalidateCache(this, purge_on_error, true, &ctx));
819 int result = ctx.wait();
823 void ImageCtx::invalidate_cache(bool purge_on_error, Context *on_finish) {
824 if (object_cacher == NULL) {
825 op_work_queue->queue(on_finish, 0);
830 object_cacher->release_set(object_set);
833 flush_cache(new C_InvalidateCache(this, purge_on_error, false, on_finish));
836 void ImageCtx::clear_nonexistence_cache() {
837 assert(cache_lock.is_locked());
840 object_cacher->clear_nonexistence(object_set);
843 bool ImageCtx::is_cache_empty() {
844 Mutex::Locker locker(cache_lock);
845 return object_cacher->set_is_empty(object_set);
848 void ImageCtx::register_watch(Context *on_finish) {
849 assert(image_watcher == NULL);
850 image_watcher = new ImageWatcher<>(*this);
851 image_watcher->register_watch(on_finish);
854 uint64_t ImageCtx::prune_parent_extents(vector<pair<uint64_t,uint64_t> >& objectx,
857 // drop extents completely beyond the overlap
858 while (!objectx.empty() && objectx.back().first >= overlap)
861 // trim final overlapping extent
862 if (!objectx.empty() && objectx.back().first + objectx.back().second > overlap)
863 objectx.back().second = overlap - objectx.back().first;
866 for (vector<pair<uint64_t,uint64_t> >::iterator p = objectx.begin();
870 ldout(cct, 10) << "prune_parent_extents image overlap " << overlap
871 << ", object overlap " << len
872 << " from image extents " << objectx << dendl;
876 void ImageCtx::flush_async_operations() {
878 flush_async_operations(&ctx);
882 void ImageCtx::flush_async_operations(Context *on_finish) {
884 Mutex::Locker l(async_ops_lock);
885 if (!async_ops.empty()) {
886 ldout(cct, 20) << "flush async operations: " << on_finish << " "
887 << "count=" << async_ops.size() << dendl;
888 async_ops.front()->add_flush_context(on_finish);
892 on_finish->complete(0);
895 int ImageCtx::flush() {
896 C_SaferCond cond_ctx;
898 return cond_ctx.wait();
901 void ImageCtx::flush(Context *on_safe) {
902 // ensure no locks are held when flush is complete
903 on_safe = util::create_async_context_callback(*this, on_safe);
905 if (object_cacher != NULL) {
906 // flush cache after completing all in-flight AIO ops
907 on_safe = new C_FlushCache(this, on_safe);
909 flush_async_operations(on_safe);
912 void ImageCtx::cancel_async_requests() {
914 cancel_async_requests(&ctx);
918 void ImageCtx::cancel_async_requests(Context *on_finish) {
920 Mutex::Locker async_ops_locker(async_ops_lock);
921 if (!async_requests.empty()) {
922 ldout(cct, 10) << "canceling async requests: count="
923 << async_requests.size() << dendl;
924 for (auto req : async_requests) {
925 ldout(cct, 10) << "canceling async request: " << req << dendl;
928 async_requests_waiters.push_back(on_finish);
933 on_finish->complete(0);
936 void ImageCtx::clear_pending_completions() {
937 Mutex::Locker l(completed_reqs_lock);
938 ldout(cct, 10) << "clear pending AioCompletion: count="
939 << completed_reqs.size() << dendl;
940 completed_reqs.clear();
943 bool ImageCtx::_filter_metadata_confs(const string &prefix,
944 map<string, bool> &configs,
945 const map<string, bufferlist> &pairs,
946 map<string, bufferlist> *res) {
947 size_t conf_prefix_len = prefix.size();
949 for (auto it : pairs) {
950 if (it.first.compare(0, MIN(conf_prefix_len, it.first.size()), prefix) > 0)
953 if (it.first.size() <= conf_prefix_len)
956 string key = it.first.substr(conf_prefix_len, it.first.size() - conf_prefix_len);
957 auto cit = configs.find(key);
958 if (cit != configs.end()) {
960 res->insert(make_pair(key, it.second));
966 void ImageCtx::apply_metadata(const std::map<std::string, bufferlist> &meta) {
967 ldout(cct, 20) << __func__ << dendl;
968 std::map<string, bool> configs = boost::assign::map_list_of(
969 "rbd_non_blocking_aio", false)(
971 "rbd_cache_writethrough_until_flush", false)(
972 "rbd_cache_size", false)(
973 "rbd_cache_max_dirty", false)(
974 "rbd_cache_target_dirty", false)(
975 "rbd_cache_max_dirty_age", false)(
976 "rbd_cache_max_dirty_object", false)(
977 "rbd_cache_block_writes_upfront", false)(
978 "rbd_concurrent_management_ops", false)(
979 "rbd_balance_snap_reads", false)(
980 "rbd_localize_snap_reads", false)(
981 "rbd_balance_parent_reads", false)(
982 "rbd_localize_parent_reads", false)(
983 "rbd_readahead_trigger_requests", false)(
984 "rbd_readahead_max_bytes", false)(
985 "rbd_readahead_disable_after_bytes", false)(
986 "rbd_clone_copy_on_read", false)(
987 "rbd_blacklist_on_break_lock", false)(
988 "rbd_blacklist_expire_seconds", false)(
989 "rbd_request_timed_out_seconds", false)(
990 "rbd_journal_order", false)(
991 "rbd_journal_splay_width", false)(
992 "rbd_journal_commit_age", false)(
993 "rbd_journal_object_flush_interval", false)(
994 "rbd_journal_object_flush_bytes", false)(
995 "rbd_journal_object_flush_age", false)(
996 "rbd_journal_pool", false)(
997 "rbd_journal_max_payload_bytes", false)(
998 "rbd_journal_max_concurrent_object_sets", false)(
999 "rbd_mirroring_resync_after_disconnect", false)(
1000 "rbd_mirroring_replay_delay", false)(
1001 "rbd_skip_partial_discard", false);
1003 md_config_t local_config_t;
1004 std::map<std::string, bufferlist> res;
1006 _filter_metadata_confs(METADATA_CONF_PREFIX, configs, meta, &res);
1007 for (auto it : res) {
1008 std::string val(it.second.c_str(), it.second.length());
1009 int j = local_config_t.set_val(it.first.c_str(), val);
1011 lderr(cct) << __func__ << " failed to set config " << it.first
1012 << " with value " << it.second.c_str() << ": " << j
1017 #define ASSIGN_OPTION(config, type) \
1019 string key = "rbd_"; \
1020 key = key + #config; \
1022 config = local_config_t.get_val<type>("rbd_"#config); \
1024 config = cct->_conf->get_val<type>("rbd_"#config); \
1027 ASSIGN_OPTION(non_blocking_aio, bool);
1028 ASSIGN_OPTION(cache, bool);
1029 ASSIGN_OPTION(cache_writethrough_until_flush, bool);
1030 ASSIGN_OPTION(cache_size, int64_t);
1031 ASSIGN_OPTION(cache_max_dirty, int64_t);
1032 ASSIGN_OPTION(cache_target_dirty, int64_t);
1033 ASSIGN_OPTION(cache_max_dirty_age, double);
1034 ASSIGN_OPTION(cache_max_dirty_object, int64_t);
1035 ASSIGN_OPTION(cache_block_writes_upfront, bool);
1036 ASSIGN_OPTION(concurrent_management_ops, int64_t);
1037 ASSIGN_OPTION(balance_snap_reads, bool);
1038 ASSIGN_OPTION(localize_snap_reads, bool);
1039 ASSIGN_OPTION(balance_parent_reads, bool);
1040 ASSIGN_OPTION(localize_parent_reads, bool);
1041 ASSIGN_OPTION(readahead_trigger_requests, int64_t);
1042 ASSIGN_OPTION(readahead_max_bytes, int64_t);
1043 ASSIGN_OPTION(readahead_disable_after_bytes, int64_t);
1044 ASSIGN_OPTION(clone_copy_on_read, bool);
1045 ASSIGN_OPTION(blacklist_on_break_lock, bool);
1046 ASSIGN_OPTION(blacklist_expire_seconds, int64_t);
1047 ASSIGN_OPTION(request_timed_out_seconds, int64_t);
1048 ASSIGN_OPTION(enable_alloc_hint, bool);
1049 ASSIGN_OPTION(journal_order, uint64_t);
1050 ASSIGN_OPTION(journal_splay_width, uint64_t);
1051 ASSIGN_OPTION(journal_commit_age, double);
1052 ASSIGN_OPTION(journal_object_flush_interval, int64_t);
1053 ASSIGN_OPTION(journal_object_flush_bytes, int64_t);
1054 ASSIGN_OPTION(journal_object_flush_age, double);
1055 ASSIGN_OPTION(journal_pool, std::string);
1056 ASSIGN_OPTION(journal_max_payload_bytes, uint64_t);
1057 ASSIGN_OPTION(journal_max_concurrent_object_sets, int64_t);
1058 ASSIGN_OPTION(mirroring_resync_after_disconnect, bool);
1059 ASSIGN_OPTION(mirroring_replay_delay, int64_t);
1060 ASSIGN_OPTION(skip_partial_discard, bool);
1061 ASSIGN_OPTION(blkin_trace_all, bool);
1064 ExclusiveLock<ImageCtx> *ImageCtx::create_exclusive_lock() {
1065 return new ExclusiveLock<ImageCtx>(*this);
1068 ObjectMap<ImageCtx> *ImageCtx::create_object_map(uint64_t snap_id) {
1069 return new ObjectMap<ImageCtx>(*this, snap_id);
1072 Journal<ImageCtx> *ImageCtx::create_journal() {
1073 return new Journal<ImageCtx>(*this);
1076 void ImageCtx::set_image_name(const std::string &image_name) {
1077 // update the name so rename can be invoked repeatedly
1078 RWLock::RLocker owner_locker(owner_lock);
1079 RWLock::WLocker snap_locker(snap_lock);
1082 header_oid = util::old_header_name(image_name);
1086 void ImageCtx::notify_update() {
1087 state->handle_update_notification();
1088 ImageWatcher<>::notify_header_update(md_ctx, header_oid);
1091 void ImageCtx::notify_update(Context *on_finish) {
1092 state->handle_update_notification();
1093 image_watcher->notify_header_update(on_finish);
1096 exclusive_lock::Policy *ImageCtx::get_exclusive_lock_policy() const {
1097 assert(owner_lock.is_locked());
1098 assert(exclusive_lock_policy != nullptr);
1099 return exclusive_lock_policy;
1102 void ImageCtx::set_exclusive_lock_policy(exclusive_lock::Policy *policy) {
1103 assert(owner_lock.is_wlocked());
1104 assert(policy != nullptr);
1105 delete exclusive_lock_policy;
1106 exclusive_lock_policy = policy;
1109 journal::Policy *ImageCtx::get_journal_policy() const {
1110 assert(snap_lock.is_locked());
1111 assert(journal_policy != nullptr);
1112 return journal_policy;
1115 void ImageCtx::set_journal_policy(journal::Policy *policy) {
1116 assert(snap_lock.is_wlocked());
1117 assert(policy != nullptr);
1118 delete journal_policy;
1119 journal_policy = policy;
1122 void ImageCtx::get_thread_pool_instance(CephContext *cct,
1123 ThreadPool **thread_pool,
1124 ContextWQ **op_work_queue) {
1125 ThreadPoolSingleton *thread_pool_singleton;
1126 cct->lookup_or_create_singleton_object<ThreadPoolSingleton>(
1127 thread_pool_singleton, "librbd::thread_pool");
1128 *thread_pool = thread_pool_singleton;
1129 *op_work_queue = thread_pool_singleton->op_work_queue;
1132 void ImageCtx::get_timer_instance(CephContext *cct, SafeTimer **timer,
1133 Mutex **timer_lock) {
1134 SafeTimerSingleton *safe_timer_singleton;
1135 cct->lookup_or_create_singleton_object<SafeTimerSingleton>(
1136 safe_timer_singleton, "librbd::journal::safe_timer");
1137 *timer = safe_timer_singleton;
1138 *timer_lock = &safe_timer_singleton->lock;