1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "librbd/api/DiffIterate.h"
5 #include "librbd/ImageCtx.h"
6 #include "librbd/ImageState.h"
7 #include "librbd/ObjectMap.h"
8 #include "librbd/Utils.h"
9 #include "librbd/internal.h"
10 #include "include/rados/librados.hpp"
11 #include "include/interval_set.h"
12 #include "common/errno.h"
13 #include "common/Throttle.h"
14 #include "librados/snap_set_diff.h"
15 #include <boost/tuple/tuple.hpp>
20 #define dout_subsys ceph_subsys_rbd
22 #define dout_prefix *_dout << "librbd::DiffIterate: "
29 enum ObjectDiffState {
30 OBJECT_DIFF_STATE_NONE = 0,
31 OBJECT_DIFF_STATE_UPDATED = 1,
32 OBJECT_DIFF_STATE_HOLE = 2
36 DiffIterate<>::Callback callback;
39 uint64_t from_snap_id;
41 interval_set<uint64_t> parent_diff;
42 OrderedThrottle throttle;
45 DiffContext(I &image_ctx, DiffIterate<>::Callback callback,
46 void *callback_arg, bool _whole_object, uint64_t _from_snap_id,
47 uint64_t _end_snap_id)
48 : callback(callback), callback_arg(callback_arg),
49 whole_object(_whole_object), from_snap_id(_from_snap_id),
50 end_snap_id(_end_snap_id),
51 throttle(image_ctx.concurrent_management_ops, true) {
55 class C_DiffObject : public Context {
58 C_DiffObject(I &image_ctx, librados::IoCtx &head_ctx,
59 DiffContext &diff_context, const std::string &oid,
60 uint64_t offset, const std::vector<ObjectExtent> &object_extents)
61 : m_cct(image_ctx.cct), m_head_ctx(head_ctx),
62 m_diff_context(diff_context), m_oid(oid), m_offset(offset),
63 m_object_extents(object_extents), m_snap_ret(0) {
67 C_OrderedThrottle *ctx = m_diff_context.throttle.start_op(this);
68 librados::AioCompletion *rados_completion =
69 util::create_rados_callback(ctx);
71 librados::ObjectReadOperation op;
72 op.list_snaps(&m_snap_set, &m_snap_ret);
74 int r = m_head_ctx.aio_operate(m_oid, rados_completion, &op, NULL);
76 rados_completion->release();
80 typedef boost::tuple<uint64_t, size_t, bool> Diff;
81 typedef std::list<Diff> Diffs;
83 void finish(int r) override {
84 CephContext *cct = m_cct;
85 if (r == 0 && m_snap_ret < 0) {
91 ldout(cct, 20) << "object " << m_oid << ": list_snaps complete" << dendl;
92 compute_diffs(&diffs);
93 } else if (r == -ENOENT) {
94 ldout(cct, 20) << "object " << m_oid << ": list_snaps (not found)"
97 compute_parent_overlap(&diffs);
99 ldout(cct, 20) << "object " << m_oid << ": list_snaps failed: "
100 << cpp_strerror(r) << dendl;
104 for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) {
105 r = m_diff_context.callback(d->get<0>(), d->get<1>(), d->get<2>(),
106 m_diff_context.callback_arg);
112 m_diff_context.throttle.end_op(r);
117 librados::IoCtx &m_head_ctx;
118 DiffContext &m_diff_context;
121 std::vector<ObjectExtent> m_object_extents;
123 librados::snap_set_t m_snap_set;
126 void compute_diffs(Diffs *diffs) {
127 CephContext *cct = m_cct;
129 // calc diff from from_snap_id -> to_snap_id
130 interval_set<uint64_t> diff;
133 librados::snap_t clone_end_snap_id;
134 calc_snap_set_diff(cct, m_snap_set, m_diff_context.from_snap_id,
135 m_diff_context.end_snap_id, &diff, &end_size,
136 &end_exists, &clone_end_snap_id);
137 ldout(cct, 20) << " diff " << diff << " end_exists=" << end_exists
140 if (m_diff_context.from_snap_id == 0 && !end_exists) {
141 compute_parent_overlap(diffs);
144 } else if (m_diff_context.whole_object) {
145 // provide the full object extents to the callback
146 for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
147 q != m_object_extents.end(); ++q) {
148 diffs->push_back(boost::make_tuple(m_offset + q->offset, q->length,
154 for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
155 q != m_object_extents.end(); ++q) {
156 ldout(cct, 20) << "diff_iterate object " << m_oid << " extent "
157 << q->offset << "~" << q->length << " from "
158 << q->buffer_extents << dendl;
159 uint64_t opos = q->offset;
160 for (vector<pair<uint64_t,uint64_t> >::iterator r =
161 q->buffer_extents.begin();
162 r != q->buffer_extents.end(); ++r) {
163 interval_set<uint64_t> overlap; // object extents
164 overlap.insert(opos, r->second);
165 overlap.intersection_of(diff);
166 ldout(cct, 20) << " opos " << opos
167 << " buf " << r->first << "~" << r->second
168 << " overlap " << overlap << dendl;
169 for (interval_set<uint64_t>::iterator s = overlap.begin();
170 s != overlap.end(); ++s) {
171 uint64_t su_off = s.get_start() - opos;
172 uint64_t logical_off = m_offset + r->first + su_off;
173 ldout(cct, 20) << " overlap extent " << s.get_start() << "~"
174 << s.get_len() << " logical " << logical_off << "~"
175 << s.get_len() << dendl;
176 diffs->push_back(boost::make_tuple(logical_off, s.get_len(),
181 assert(opos == q->offset + q->length);
185 void compute_parent_overlap(Diffs *diffs) {
186 if (m_diff_context.from_snap_id == 0 &&
187 !m_diff_context.parent_diff.empty()) {
188 // report parent diff instead
189 for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
190 q != m_object_extents.end(); ++q) {
191 for (vector<pair<uint64_t,uint64_t> >::iterator r =
192 q->buffer_extents.begin();
193 r != q->buffer_extents.end(); ++r) {
194 interval_set<uint64_t> o;
195 o.insert(m_offset + r->first, r->second);
196 o.intersection_of(m_diff_context.parent_diff);
197 ldout(m_cct, 20) << " reporting parent overlap " << o << dendl;
198 for (interval_set<uint64_t>::iterator s = o.begin(); s != o.end();
200 diffs->push_back(boost::make_tuple(s.get_start(), s.get_len(),
209 int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) {
210 // it's possible for a discard to create a hole in the parent image -- ignore
212 interval_set<uint64_t> *diff = static_cast<interval_set<uint64_t> *>(arg);
213 diff->insert(off, len);
218 } // anonymous namespace
220 template <typename I>
221 int DiffIterate<I>::diff_iterate(I *ictx,
222 const cls::rbd::SnapshotNamespace& from_snap_namespace,
223 const char *fromsnapname,
224 uint64_t off, uint64_t len,
225 bool include_parent, bool whole_object,
226 int (*cb)(uint64_t, size_t, int, void *),
229 ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off
230 << " len = " << len << dendl;
232 // ensure previous writes are visible to listsnaps
234 RWLock::RLocker owner_locker(ictx->owner_lock);
238 int r = ictx->state->refresh_if_required();
243 ictx->snap_lock.get_read();
244 r = clip_io(ictx, off, &len);
245 ictx->snap_lock.put_read();
250 DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len,
251 include_parent, whole_object, cb, arg);
252 r = command.execute();
256 template <typename I>
257 int DiffIterate<I>::execute() {
258 CephContext* cct = m_image_ctx.cct;
260 librados::IoCtx head_ctx;
261 librados::snap_t from_snap_id = 0;
262 librados::snap_t end_snap_id;
263 uint64_t from_size = 0;
266 RWLock::RLocker md_locker(m_image_ctx.md_lock);
267 RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
268 head_ctx.dup(m_image_ctx.data_ctx);
269 if (m_from_snap_name) {
270 from_snap_id = m_image_ctx.get_snap_id(m_from_snap_namespace, m_from_snap_name);
271 from_size = m_image_ctx.get_image_size(from_snap_id);
273 end_snap_id = m_image_ctx.snap_id;
274 end_size = m_image_ctx.get_image_size(end_snap_id);
277 if (from_snap_id == CEPH_NOSNAP) {
280 if (from_snap_id == end_snap_id) {
284 if (from_snap_id >= end_snap_id) {
289 bool fast_diff_enabled = false;
290 BitVector<2> object_diff_state;
292 RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
293 if (m_whole_object && (m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0) {
294 r = diff_object_map(from_snap_id, end_snap_id, &object_diff_state);
296 ldout(cct, 5) << "fast diff disabled" << dendl;
298 ldout(cct, 5) << "fast diff enabled" << dendl;
299 fast_diff_enabled = true;
304 // we must list snaps via the head, not end snap
305 head_ctx.snap_set_read(CEPH_SNAPDIR);
307 ldout(cct, 5) << "diff_iterate from " << from_snap_id << " to "
308 << end_snap_id << " size from " << from_size
309 << " to " << end_size << dendl;
311 // check parent overlap only if we are comparing to the beginning of time
312 DiffContext diff_context(m_image_ctx, m_callback, m_callback_arg,
313 m_whole_object, from_snap_id, end_snap_id);
314 if (m_include_parent && from_snap_id == 0) {
315 RWLock::RLocker l(m_image_ctx.snap_lock);
316 RWLock::RLocker l2(m_image_ctx.parent_lock);
317 uint64_t overlap = 0;
318 m_image_ctx.get_parent_overlap(m_image_ctx.snap_id, &overlap);
320 if (m_image_ctx.parent && overlap > 0) {
321 ldout(cct, 10) << " first getting parent diff" << dendl;
322 DiffIterate diff_parent(*m_image_ctx.parent, {},
324 m_include_parent, m_whole_object,
326 &diff_context.parent_diff);
327 r = diff_parent.execute();
334 uint64_t period = m_image_ctx.get_stripe_period();
335 uint64_t off = m_offset;
336 uint64_t left = m_length;
339 uint64_t period_off = off - (off % period);
340 uint64_t read_len = min(period_off + period - off, left);
343 map<object_t,vector<ObjectExtent> > object_extents;
344 Striper::file_to_extents(cct, m_image_ctx.format_string,
345 &m_image_ctx.layout, off, read_len, 0,
348 // get snap info for each object
349 for (map<object_t,vector<ObjectExtent> >::iterator p =
350 object_extents.begin();
351 p != object_extents.end(); ++p) {
352 ldout(cct, 20) << "object " << p->first << dendl;
354 if (fast_diff_enabled) {
355 const uint64_t object_no = p->second.front().objectno;
356 if (object_diff_state[object_no] != OBJECT_DIFF_STATE_NONE) {
357 bool updated = (object_diff_state[object_no] ==
358 OBJECT_DIFF_STATE_UPDATED);
359 for (std::vector<ObjectExtent>::iterator q = p->second.begin();
360 q != p->second.end(); ++q) {
361 r = m_callback(off + q->offset, q->length, updated, m_callback_arg);
368 C_DiffObject *diff_object = new C_DiffObject(m_image_ctx, head_ctx,
374 if (diff_context.throttle.pending_error()) {
375 r = diff_context.throttle.wait_for_ret();
385 r = diff_context.throttle.wait_for_ret();
392 template <typename I>
393 int DiffIterate<I>::diff_object_map(uint64_t from_snap_id, uint64_t to_snap_id,
394 BitVector<2>* object_diff_state) {
395 assert(m_image_ctx.snap_lock.is_locked());
396 CephContext* cct = m_image_ctx.cct;
398 bool diff_from_start = (from_snap_id == 0);
399 if (from_snap_id == 0) {
400 if (!m_image_ctx.snaps.empty()) {
401 from_snap_id = m_image_ctx.snaps.back();
403 from_snap_id = CEPH_NOSNAP;
407 object_diff_state->clear();
408 uint64_t current_snap_id = from_snap_id;
409 uint64_t next_snap_id = to_snap_id;
410 BitVector<2> prev_object_map;
411 bool prev_object_map_valid = false;
413 uint64_t current_size = m_image_ctx.size;
414 if (current_snap_id != CEPH_NOSNAP) {
415 std::map<librados::snap_t, SnapInfo>::const_iterator snap_it =
416 m_image_ctx.snap_info.find(current_snap_id);
417 assert(snap_it != m_image_ctx.snap_info.end());
418 current_size = snap_it->second.size;
421 if (snap_it != m_image_ctx.snap_info.end()) {
422 next_snap_id = snap_it->first;
424 next_snap_id = CEPH_NOSNAP;
429 int r = m_image_ctx.get_flags(from_snap_id, &flags);
431 lderr(cct) << "diff_object_map: failed to retrieve image flags" << dendl;
434 if ((flags & RBD_FLAG_FAST_DIFF_INVALID) != 0) {
435 ldout(cct, 1) << "diff_object_map: cannot perform fast diff on invalid "
436 << "object map" << dendl;
440 BitVector<2> object_map;
441 std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id,
443 r = cls_client::object_map_load(&m_image_ctx.md_ctx, oid, &object_map);
445 lderr(cct) << "diff_object_map: failed to load object map " << oid
449 ldout(cct, 20) << "diff_object_map: loaded object map " << oid << dendl;
451 uint64_t num_objs = Striper::get_num_objects(m_image_ctx.layout,
453 if (object_map.size() < num_objs) {
454 ldout(cct, 1) << "diff_object_map: object map too small: "
455 << object_map.size() << " < " << num_objs << dendl;
458 object_map.resize(num_objs);
460 uint64_t overlap = MIN(object_map.size(), prev_object_map.size());
461 for (uint64_t i = 0; i < overlap; ++i) {
462 ldout(cct, 20) << __func__ << ": object state: " << i << " "
463 << static_cast<uint32_t>(prev_object_map[i])
464 << "->" << static_cast<uint32_t>(object_map[i]) << dendl;
465 if (object_map[i] == OBJECT_NONEXISTENT) {
466 if (prev_object_map[i] != OBJECT_NONEXISTENT) {
467 (*object_diff_state)[i] = OBJECT_DIFF_STATE_HOLE;
469 } else if (object_map[i] == OBJECT_EXISTS ||
470 (prev_object_map[i] != object_map[i] &&
471 !(prev_object_map[i] == OBJECT_EXISTS &&
472 object_map[i] == OBJECT_EXISTS_CLEAN))) {
473 (*object_diff_state)[i] = OBJECT_DIFF_STATE_UPDATED;
476 ldout(cct, 20) << "diff_object_map: computed overlap diffs" << dendl;
478 object_diff_state->resize(object_map.size());
479 if (object_map.size() > prev_object_map.size() &&
480 (diff_from_start || prev_object_map_valid)) {
481 for (uint64_t i = overlap; i < object_diff_state->size(); ++i) {
482 ldout(cct, 20) << __func__ << ": object state: " << i << " "
483 << "->" << static_cast<uint32_t>(object_map[i]) << dendl;
484 if (object_map[i] == OBJECT_NONEXISTENT) {
485 (*object_diff_state)[i] = OBJECT_DIFF_STATE_NONE;
487 (*object_diff_state)[i] = OBJECT_DIFF_STATE_UPDATED;
491 ldout(cct, 20) << "diff_object_map: computed resize diffs" << dendl;
493 if (current_snap_id == next_snap_id || next_snap_id > to_snap_id) {
496 current_snap_id = next_snap_id;
497 prev_object_map = object_map;
498 prev_object_map_valid = true;
504 } // namespace librbd
506 template class librbd::api::DiffIterate<librbd::ImageCtx>;