1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "ObjectCopyRequest.h"
5 #include "librados/snap_set_diff.h"
6 #include "librbd/ExclusiveLock.h"
7 #include "librbd/ObjectMap.h"
8 #include "librbd/Utils.h"
9 #include "common/errno.h"
11 #define dout_context g_ceph_context
12 #define dout_subsys ceph_subsys_rbd_mirror
14 #define dout_prefix *_dout << "rbd::mirror::image_sync::ObjectCopyRequest: " \
15 << this << " " << __func__
19 bool operator==(const clone_info_t& rhs, const clone_info_t& lhs) {
20 return (rhs.cloneid == lhs.cloneid &&
21 rhs.snaps == lhs.snaps &&
22 rhs.overlap == lhs.overlap &&
23 rhs.size == lhs.size);
26 bool operator==(const snap_set_t& rhs, const snap_set_t& lhs) {
27 return (rhs.clones == lhs.clones &&
31 } // namespace librados
35 namespace image_sync {
37 using librbd::util::create_context_callback;
38 using librbd::util::create_rados_callback;
41 ObjectCopyRequest<I>::ObjectCopyRequest(I *local_image_ctx, I *remote_image_ctx,
42 const SnapMap *snap_map,
43 uint64_t object_number,
45 : m_local_image_ctx(local_image_ctx), m_remote_image_ctx(remote_image_ctx),
46 m_snap_map(snap_map), m_object_number(object_number),
47 m_on_finish(on_finish) {
48 assert(!snap_map->empty());
50 m_local_io_ctx.dup(m_local_image_ctx->data_ctx);
51 m_local_oid = m_local_image_ctx->get_object_name(object_number);
53 m_remote_io_ctx.dup(m_remote_image_ctx->data_ctx);
54 m_remote_oid = m_remote_image_ctx->get_object_name(object_number);
57 << "remote_oid=" << m_remote_oid << ", "
58 << "local_oid=" << m_local_oid << dendl;
62 void ObjectCopyRequest<I>::send() {
67 void ObjectCopyRequest<I>::send_list_snaps() {
70 librados::AioCompletion *rados_completion = create_rados_callback<
71 ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_list_snaps>(this);
73 librados::ObjectReadOperation op;
76 op.list_snaps(&m_snap_set, &m_snap_ret);
78 m_remote_io_ctx.snap_set_read(CEPH_SNAPDIR);
79 int r = m_remote_io_ctx.aio_operate(m_remote_oid, rados_completion, &op,
82 rados_completion->release();
86 void ObjectCopyRequest<I>::handle_list_snaps(int r) {
87 if (r == 0 && m_snap_ret < 0) {
91 dout(20) << ": r=" << r << dendl;
99 derr << ": failed to list snaps: " << cpp_strerror(r) << dendl;
104 if (m_retry_missing_read) {
105 if (m_snap_set == m_retry_snap_set) {
106 derr << ": read encountered missing object using up-to-date snap set"
112 dout(20) << ": retrying using updated snap set" << dendl;
113 m_retry_missing_read = false;
114 m_retry_snap_set = {};
121 template <typename I>
122 void ObjectCopyRequest<I>::send_read_object() {
123 if (m_snap_sync_ops.empty()) {
124 // no more snapshot diffs to read from remote
129 // build the read request
130 auto &sync_ops = m_snap_sync_ops.begin()->second;
131 assert(!sync_ops.empty());
133 bool read_required = false;
134 librados::ObjectReadOperation op;
135 for (auto &sync_op : sync_ops) {
136 switch (sync_op.type) {
137 case SYNC_OP_TYPE_WRITE:
138 if (!read_required) {
139 // map the sync op start snap id back to the necessary read snap id
140 librados::snap_t remote_snap_seq =
141 m_snap_sync_ops.begin()->first.second;
142 m_remote_io_ctx.snap_set_read(remote_snap_seq);
144 dout(20) << ": remote_snap_seq=" << remote_snap_seq << dendl;
145 read_required = true;
147 dout(20) << ": read op: " << sync_op.offset << "~" << sync_op.length
149 op.sparse_read(sync_op.offset, sync_op.length, &sync_op.extent_map,
150 &sync_op.out_bl, nullptr);
151 op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL |
152 LIBRADOS_OP_FLAG_FADVISE_NOCACHE);
159 if (!read_required) {
160 // nothing written to this object for this snapshot (must be trunc/remove)
165 librados::AioCompletion *comp = create_rados_callback<
166 ObjectCopyRequest<I>, &ObjectCopyRequest<I>::handle_read_object>(this);
167 int r = m_remote_io_ctx.aio_operate(m_remote_oid, comp, &op, nullptr);
172 template <typename I>
173 void ObjectCopyRequest<I>::handle_read_object(int r) {
174 dout(20) << ": r=" << r << dendl;
177 m_retry_snap_set = m_snap_set;
178 m_retry_missing_read = true;
180 dout(5) << ": object missing potentially due to removed snapshot" << dendl;
186 derr << ": failed to read from remote object: " << cpp_strerror(r)
195 template <typename I>
196 void ObjectCopyRequest<I>::send_write_object() {
197 // retrieve the local snap context for the op
198 SnapIds local_snap_ids;
199 librados::snap_t local_snap_seq = 0;
200 librados::snap_t remote_snap_seq = m_snap_sync_ops.begin()->first.first;
201 if (remote_snap_seq != 0) {
202 auto snap_map_it = m_snap_map->find(remote_snap_seq);
203 assert(snap_map_it != m_snap_map->end());
205 // write snapshot context should be before actual snapshot
206 if (snap_map_it != m_snap_map->begin()) {
208 assert(!snap_map_it->second.empty());
209 local_snap_seq = snap_map_it->second.front();
210 local_snap_ids = snap_map_it->second;
214 Context *finish_op_ctx;
216 RWLock::RLocker owner_locker(m_local_image_ctx->owner_lock);
217 finish_op_ctx = start_local_op(m_local_image_ctx->owner_lock);
219 if (finish_op_ctx == nullptr) {
220 derr << ": lost exclusive lock" << dendl;
226 << "local_snap_seq=" << local_snap_seq << ", "
227 << "local_snaps=" << local_snap_ids << dendl;
229 auto &sync_ops = m_snap_sync_ops.begin()->second;
230 assert(!sync_ops.empty());
231 uint64_t object_offset;
232 uint64_t buffer_offset;
233 librados::ObjectWriteOperation op;
234 for (auto &sync_op : sync_ops) {
235 switch (sync_op.type) {
236 case SYNC_OP_TYPE_WRITE:
237 object_offset = sync_op.offset;
239 for (auto it : sync_op.extent_map) {
240 if (object_offset < it.first) {
241 dout(20) << ": zero op: " << object_offset << "~"
242 << it.first - object_offset << dendl;
243 op.zero(object_offset, it.first - object_offset);
245 dout(20) << ": write op: " << it.first << "~" << it.second << dendl;
247 tmpbl.substr_of(sync_op.out_bl, buffer_offset, it.second);
248 op.write(it.first, tmpbl);
249 op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL |
250 LIBRADOS_OP_FLAG_FADVISE_NOCACHE);
251 buffer_offset += it.second;
252 object_offset = it.first + it.second;
254 if (object_offset < sync_op.offset + sync_op.length) {
255 uint64_t sync_op_end = sync_op.offset + sync_op.length;
256 assert(sync_op_end <= m_snap_object_sizes[remote_snap_seq]);
257 if (sync_op_end == m_snap_object_sizes[remote_snap_seq]) {
258 dout(20) << ": trunc op: " << object_offset << dendl;
259 op.truncate(object_offset);
260 m_snap_object_sizes[remote_snap_seq] = object_offset;
262 dout(20) << ": zero op: " << object_offset << "~"
263 << sync_op_end - object_offset << dendl;
264 op.zero(object_offset, sync_op_end - object_offset);
268 case SYNC_OP_TYPE_TRUNC:
269 if (sync_op.offset > m_snap_object_sizes[remote_snap_seq]) {
270 // skip (must have been updated in WRITE op case issuing trunc op)
273 dout(20) << ": trunc op: " << sync_op.offset << dendl;
274 op.truncate(sync_op.offset);
276 case SYNC_OP_TYPE_REMOVE:
277 dout(20) << ": remove op" << dendl;
285 auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
286 handle_write_object(r);
287 finish_op_ctx->complete(0);
289 librados::AioCompletion *comp = create_rados_callback(ctx);
290 int r = m_local_io_ctx.aio_operate(m_local_oid, comp, &op, local_snap_seq,
296 template <typename I>
297 void ObjectCopyRequest<I>::handle_write_object(int r) {
298 dout(20) << ": r=" << r << dendl;
304 derr << ": failed to write to local object: " << cpp_strerror(r)
310 m_snap_sync_ops.erase(m_snap_sync_ops.begin());
311 if (!m_snap_sync_ops.empty()) {
316 send_update_object_map();
319 template <typename I>
320 void ObjectCopyRequest<I>::send_update_object_map() {
321 m_local_image_ctx->owner_lock.get_read();
322 m_local_image_ctx->snap_lock.get_read();
323 if (!m_local_image_ctx->test_features(RBD_FEATURE_OBJECT_MAP,
324 m_local_image_ctx->snap_lock) ||
325 m_snap_object_states.empty()) {
326 m_local_image_ctx->snap_lock.put_read();
327 m_local_image_ctx->owner_lock.put_read();
330 } else if (m_local_image_ctx->object_map == nullptr) {
331 // possible that exclusive lock was lost in background
332 derr << ": object map is not initialized" << dendl;
334 m_local_image_ctx->snap_lock.put_read();
335 m_local_image_ctx->owner_lock.put_read();
340 assert(m_local_image_ctx->object_map != nullptr);
342 auto snap_object_state = *m_snap_object_states.begin();
343 m_snap_object_states.erase(m_snap_object_states.begin());
346 << "local_snap_id=" << snap_object_state.first << ", "
347 << "object_state=" << static_cast<uint32_t>(snap_object_state.second)
350 auto finish_op_ctx = start_local_op(m_local_image_ctx->owner_lock);
351 if (finish_op_ctx == nullptr) {
352 derr << ": lost exclusive lock" << dendl;
353 m_local_image_ctx->snap_lock.put_read();
354 m_local_image_ctx->owner_lock.put_read();
359 auto ctx = new FunctionContext([this, finish_op_ctx](int r) {
360 handle_update_object_map(r);
361 finish_op_ctx->complete(0);
364 RWLock::WLocker object_map_locker(m_local_image_ctx->object_map_lock);
365 bool sent = m_local_image_ctx->object_map->template aio_update<
366 Context, &Context::complete>(
367 snap_object_state.first, m_object_number, snap_object_state.second, {},
370 m_local_image_ctx->snap_lock.put_read();
371 m_local_image_ctx->owner_lock.put_read();
374 template <typename I>
375 void ObjectCopyRequest<I>::handle_update_object_map(int r) {
376 dout(20) << ": r=" << r << dendl;
379 if (!m_snap_object_states.empty()) {
380 send_update_object_map();
386 template <typename I>
387 Context *ObjectCopyRequest<I>::start_local_op(RWLock &owner_lock) {
388 assert(m_local_image_ctx->owner_lock.is_locked());
389 if (m_local_image_ctx->exclusive_lock == nullptr) {
392 return m_local_image_ctx->exclusive_lock->start_op();
395 template <typename I>
396 void ObjectCopyRequest<I>::compute_diffs() {
397 CephContext *cct = m_local_image_ctx->cct;
399 m_snap_sync_ops = {};
400 m_snap_object_states = {};
401 m_snap_object_sizes = {};
403 librados::snap_t remote_sync_pont_snap_id = m_snap_map->rbegin()->first;
404 uint64_t prev_end_size = 0;
405 bool prev_exists = false;
406 librados::snap_t start_remote_snap_id = 0;
407 for (auto &pair : *m_snap_map) {
408 assert(!pair.second.empty());
409 librados::snap_t end_remote_snap_id = pair.first;
410 librados::snap_t end_local_snap_id = pair.second.front();
412 interval_set<uint64_t> diff;
415 librados::snap_t clone_end_snap_id;
416 calc_snap_set_diff(cct, m_snap_set, start_remote_snap_id,
417 end_remote_snap_id, &diff, &end_size, &exists,
421 << "start_remote_snap=" << start_remote_snap_id << ", "
422 << "end_remote_snap_id=" << end_remote_snap_id << ", "
423 << "clone_end_snap_id=" << clone_end_snap_id << ", "
424 << "end_local_snap_id=" << end_local_snap_id << ", "
425 << "diff=" << diff << ", "
426 << "end_size=" << end_size << ", "
427 << "exists=" << exists << dendl;
429 // clip diff to size of object (in case it was truncated)
430 if (end_size < prev_end_size) {
431 interval_set<uint64_t> trunc;
432 trunc.insert(end_size, prev_end_size);
433 trunc.intersection_of(diff);
434 diff.subtract(trunc);
435 dout(20) << ": clearing truncate diff: " << trunc << dendl;
438 // prepare the object map state
440 RWLock::RLocker snap_locker(m_local_image_ctx->snap_lock);
441 uint8_t object_state = OBJECT_EXISTS;
442 if (m_local_image_ctx->test_features(RBD_FEATURE_FAST_DIFF,
443 m_local_image_ctx->snap_lock) &&
444 prev_exists && diff.empty() && end_size == prev_end_size) {
445 object_state = OBJECT_EXISTS_CLEAN;
447 m_snap_object_states[end_local_snap_id] = object_state;
450 // reads should be issued against the newest (existing) snapshot within
451 // the associated snapshot object clone. writes should be issued
452 // against the oldest snapshot in the snap_map.
453 assert(clone_end_snap_id >= end_remote_snap_id);
454 if (clone_end_snap_id > remote_sync_pont_snap_id) {
455 // do not read past the sync point snapshot
456 clone_end_snap_id = remote_sync_pont_snap_id;
459 // object write/zero, or truncate
460 // NOTE: a single snapshot clone might represent multiple snapshots, but
461 // the write/zero and truncate ops will only be associated with the first
462 // snapshot encountered within the clone since the diff will be empty for
463 // subsequent snapshots and the size will remain constant for a clone.
464 for (auto it = diff.begin(); it != diff.end(); ++it) {
465 dout(20) << ": read/write op: " << it.get_start() << "~"
466 << it.get_len() << dendl;
467 m_snap_sync_ops[{end_remote_snap_id, clone_end_snap_id}].emplace_back(
468 SYNC_OP_TYPE_WRITE, it.get_start(), it.get_len());
470 if (end_size < prev_end_size) {
471 dout(20) << ": trunc op: " << end_size << dendl;
472 m_snap_sync_ops[{end_remote_snap_id, clone_end_snap_id}].emplace_back(
473 SYNC_OP_TYPE_TRUNC, end_size, 0U);
475 m_snap_object_sizes[end_remote_snap_id] = end_size;
479 dout(20) << ": remove op" << dendl;
480 m_snap_sync_ops[{end_remote_snap_id, end_remote_snap_id}].emplace_back(
481 SYNC_OP_TYPE_REMOVE, 0U, 0U);
485 prev_end_size = end_size;
486 prev_exists = exists;
487 start_remote_snap_id = end_remote_snap_id;
491 template <typename I>
492 void ObjectCopyRequest<I>::finish(int r) {
493 dout(20) << ": r=" << r << dendl;
495 // ensure IoCtxs are closed prior to proceeding
496 auto on_finish = m_on_finish;
499 on_finish->complete(r);
502 } // namespace image_sync
503 } // namespace mirror
506 template class rbd::mirror::image_sync::ObjectCopyRequest<librbd::ImageCtx>;