X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Ftools%2Frbd_mirror%2Fimage_sync%2FObjectCopyRequest.cc;fp=src%2Fceph%2Fsrc%2Ftools%2Frbd_mirror%2Fimage_sync%2FObjectCopyRequest.cc;h=cc59030878ba35846c0182e6756aaed1c88755f4;hb=812ff6ca9fcd3e629e49d4328905f33eee8ca3f5;hp=0000000000000000000000000000000000000000;hpb=15280273faafb77777eab341909a3f495cf248d9;p=stor4nfv.git diff --git a/src/ceph/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.cc b/src/ceph/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.cc new file mode 100644 index 0000000..cc59030 --- /dev/null +++ b/src/ceph/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.cc @@ -0,0 +1,506 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ObjectCopyRequest.h" +#include "librados/snap_set_diff.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" +#include "common/errno.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_sync::ObjectCopyRequest: " \ + << this << " " << __func__ + +namespace librados { + +bool operator==(const clone_info_t& rhs, const clone_info_t& lhs) { + return (rhs.cloneid == lhs.cloneid && + rhs.snaps == lhs.snaps && + rhs.overlap == lhs.overlap && + rhs.size == lhs.size); +} + +bool operator==(const snap_set_t& rhs, const snap_set_t& lhs) { + return (rhs.clones == lhs.clones && + rhs.seq == lhs.seq); +} + +} // namespace librados + +namespace rbd { +namespace mirror { +namespace image_sync { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template +ObjectCopyRequest::ObjectCopyRequest(I *local_image_ctx, I *remote_image_ctx, + const SnapMap *snap_map, + uint64_t object_number, + Context *on_finish) + : m_local_image_ctx(local_image_ctx), m_remote_image_ctx(remote_image_ctx), + m_snap_map(snap_map), m_object_number(object_number), + m_on_finish(on_finish) { + assert(!snap_map->empty()); + + m_local_io_ctx.dup(m_local_image_ctx->data_ctx); + m_local_oid = m_local_image_ctx->get_object_name(object_number); + + m_remote_io_ctx.dup(m_remote_image_ctx->data_ctx); + m_remote_oid = m_remote_image_ctx->get_object_name(object_number); + + dout(20) << ": " + << "remote_oid=" << m_remote_oid << ", " + << "local_oid=" << m_local_oid << dendl; +} + +template +void ObjectCopyRequest::send() { + send_list_snaps(); +} + +template +void ObjectCopyRequest::send_list_snaps() { + dout(20) << dendl; + + librados::AioCompletion *rados_completion = create_rados_callback< + ObjectCopyRequest, &ObjectCopyRequest::handle_list_snaps>(this); + + librados::ObjectReadOperation op; + m_snap_set = {}; + m_snap_ret = 0; + op.list_snaps(&m_snap_set, &m_snap_ret); + + m_remote_io_ctx.snap_set_read(CEPH_SNAPDIR); + int r = m_remote_io_ctx.aio_operate(m_remote_oid, rados_completion, &op, + nullptr); + assert(r == 0); + rados_completion->release(); +} + +template +void ObjectCopyRequest::handle_list_snaps(int r) { + if (r == 0 && m_snap_ret < 0) { + r = m_snap_ret; + } + + dout(20) << ": r=" << r << dendl; + + if (r == -ENOENT) { + finish(0); + return; + } + + if (r < 0) { + derr << ": failed to list snaps: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (m_retry_missing_read) { + if (m_snap_set == m_retry_snap_set) { + derr << ": read encountered missing object using up-to-date snap set" + << dendl; + finish(-ENOENT); + return; + } + + dout(20) << ": retrying using updated snap set" << dendl; + m_retry_missing_read = false; + m_retry_snap_set = {}; + } + + compute_diffs(); + send_read_object(); +} + +template +void ObjectCopyRequest::send_read_object() { + if (m_snap_sync_ops.empty()) { + // no more snapshot diffs to read from remote + finish(0); + return; + } + + // build the read request + auto &sync_ops = m_snap_sync_ops.begin()->second; + assert(!sync_ops.empty()); + + bool read_required = false; + librados::ObjectReadOperation op; + for (auto &sync_op : sync_ops) { + switch (sync_op.type) { + case SYNC_OP_TYPE_WRITE: + if (!read_required) { + // map the sync op start snap id back to the necessary read snap id + librados::snap_t remote_snap_seq = + m_snap_sync_ops.begin()->first.second; + m_remote_io_ctx.snap_set_read(remote_snap_seq); + + dout(20) << ": remote_snap_seq=" << remote_snap_seq << dendl; + read_required = true; + } + dout(20) << ": read op: " << sync_op.offset << "~" << sync_op.length + << dendl; + op.sparse_read(sync_op.offset, sync_op.length, &sync_op.extent_map, + &sync_op.out_bl, nullptr); + op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | + LIBRADOS_OP_FLAG_FADVISE_NOCACHE); + break; + default: + break; + } + } + + if (!read_required) { + // nothing written to this object for this snapshot (must be trunc/remove) + send_write_object(); + return; + } + + librados::AioCompletion *comp = create_rados_callback< + ObjectCopyRequest, &ObjectCopyRequest::handle_read_object>(this); + int r = m_remote_io_ctx.aio_operate(m_remote_oid, comp, &op, nullptr); + assert(r == 0); + comp->release(); +} + +template +void ObjectCopyRequest::handle_read_object(int r) { + dout(20) << ": r=" << r << dendl; + + if (r == -ENOENT) { + m_retry_snap_set = m_snap_set; + m_retry_missing_read = true; + + dout(5) << ": object missing potentially due to removed snapshot" << dendl; + send_list_snaps(); + return; + } + + if (r < 0) { + derr << ": failed to read from remote object: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + send_write_object(); +} + +template +void ObjectCopyRequest::send_write_object() { + // retrieve the local snap context for the op + SnapIds local_snap_ids; + librados::snap_t local_snap_seq = 0; + librados::snap_t remote_snap_seq = m_snap_sync_ops.begin()->first.first; + if (remote_snap_seq != 0) { + auto snap_map_it = m_snap_map->find(remote_snap_seq); + assert(snap_map_it != m_snap_map->end()); + + // write snapshot context should be before actual snapshot + if (snap_map_it != m_snap_map->begin()) { + --snap_map_it; + assert(!snap_map_it->second.empty()); + local_snap_seq = snap_map_it->second.front(); + local_snap_ids = snap_map_it->second; + } + } + + Context *finish_op_ctx; + { + RWLock::RLocker owner_locker(m_local_image_ctx->owner_lock); + finish_op_ctx = start_local_op(m_local_image_ctx->owner_lock); + } + if (finish_op_ctx == nullptr) { + derr << ": lost exclusive lock" << dendl; + finish(-EROFS); + return; + } + + dout(20) << ": " + << "local_snap_seq=" << local_snap_seq << ", " + << "local_snaps=" << local_snap_ids << dendl; + + auto &sync_ops = m_snap_sync_ops.begin()->second; + assert(!sync_ops.empty()); + uint64_t object_offset; + uint64_t buffer_offset; + librados::ObjectWriteOperation op; + for (auto &sync_op : sync_ops) { + switch (sync_op.type) { + case SYNC_OP_TYPE_WRITE: + object_offset = sync_op.offset; + buffer_offset = 0; + for (auto it : sync_op.extent_map) { + if (object_offset < it.first) { + dout(20) << ": zero op: " << object_offset << "~" + << it.first - object_offset << dendl; + op.zero(object_offset, it.first - object_offset); + } + dout(20) << ": write op: " << it.first << "~" << it.second << dendl; + bufferlist tmpbl; + tmpbl.substr_of(sync_op.out_bl, buffer_offset, it.second); + op.write(it.first, tmpbl); + op.set_op_flags2(LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | + LIBRADOS_OP_FLAG_FADVISE_NOCACHE); + buffer_offset += it.second; + object_offset = it.first + it.second; + } + if (object_offset < sync_op.offset + sync_op.length) { + uint64_t sync_op_end = sync_op.offset + sync_op.length; + assert(sync_op_end <= m_snap_object_sizes[remote_snap_seq]); + if (sync_op_end == m_snap_object_sizes[remote_snap_seq]) { + dout(20) << ": trunc op: " << object_offset << dendl; + op.truncate(object_offset); + m_snap_object_sizes[remote_snap_seq] = object_offset; + } else { + dout(20) << ": zero op: " << object_offset << "~" + << sync_op_end - object_offset << dendl; + op.zero(object_offset, sync_op_end - object_offset); + } + } + break; + case SYNC_OP_TYPE_TRUNC: + if (sync_op.offset > m_snap_object_sizes[remote_snap_seq]) { + // skip (must have been updated in WRITE op case issuing trunc op) + break; + } + dout(20) << ": trunc op: " << sync_op.offset << dendl; + op.truncate(sync_op.offset); + break; + case SYNC_OP_TYPE_REMOVE: + dout(20) << ": remove op" << dendl; + op.remove(); + break; + default: + assert(false); + } + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_write_object(r); + finish_op_ctx->complete(0); + }); + librados::AioCompletion *comp = create_rados_callback(ctx); + int r = m_local_io_ctx.aio_operate(m_local_oid, comp, &op, local_snap_seq, + local_snap_ids); + assert(r == 0); + comp->release(); +} + +template +void ObjectCopyRequest::handle_write_object(int r) { + dout(20) << ": r=" << r << dendl; + + if (r == -ENOENT) { + r = 0; + } + if (r < 0) { + derr << ": failed to write to local object: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + m_snap_sync_ops.erase(m_snap_sync_ops.begin()); + if (!m_snap_sync_ops.empty()) { + send_read_object(); + return; + } + + send_update_object_map(); +} + +template +void ObjectCopyRequest::send_update_object_map() { + m_local_image_ctx->owner_lock.get_read(); + m_local_image_ctx->snap_lock.get_read(); + if (!m_local_image_ctx->test_features(RBD_FEATURE_OBJECT_MAP, + m_local_image_ctx->snap_lock) || + m_snap_object_states.empty()) { + m_local_image_ctx->snap_lock.put_read(); + m_local_image_ctx->owner_lock.put_read(); + finish(0); + return; + } else if (m_local_image_ctx->object_map == nullptr) { + // possible that exclusive lock was lost in background + derr << ": object map is not initialized" << dendl; + + m_local_image_ctx->snap_lock.put_read(); + m_local_image_ctx->owner_lock.put_read(); + finish(-EINVAL); + return; + } + + assert(m_local_image_ctx->object_map != nullptr); + + auto snap_object_state = *m_snap_object_states.begin(); + m_snap_object_states.erase(m_snap_object_states.begin()); + + dout(20) << ": " + << "local_snap_id=" << snap_object_state.first << ", " + << "object_state=" << static_cast(snap_object_state.second) + << dendl; + + auto finish_op_ctx = start_local_op(m_local_image_ctx->owner_lock); + if (finish_op_ctx == nullptr) { + derr << ": lost exclusive lock" << dendl; + m_local_image_ctx->snap_lock.put_read(); + m_local_image_ctx->owner_lock.put_read(); + finish(-EROFS); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_update_object_map(r); + finish_op_ctx->complete(0); + }); + + RWLock::WLocker object_map_locker(m_local_image_ctx->object_map_lock); + bool sent = m_local_image_ctx->object_map->template aio_update< + Context, &Context::complete>( + snap_object_state.first, m_object_number, snap_object_state.second, {}, + {}, ctx); + assert(sent); + m_local_image_ctx->snap_lock.put_read(); + m_local_image_ctx->owner_lock.put_read(); +} + +template +void ObjectCopyRequest::handle_update_object_map(int r) { + dout(20) << ": r=" << r << dendl; + + assert(r == 0); + if (!m_snap_object_states.empty()) { + send_update_object_map(); + return; + } + finish(0); +} + +template +Context *ObjectCopyRequest::start_local_op(RWLock &owner_lock) { + assert(m_local_image_ctx->owner_lock.is_locked()); + if (m_local_image_ctx->exclusive_lock == nullptr) { + return nullptr; + } + return m_local_image_ctx->exclusive_lock->start_op(); +} + +template +void ObjectCopyRequest::compute_diffs() { + CephContext *cct = m_local_image_ctx->cct; + + m_snap_sync_ops = {}; + m_snap_object_states = {}; + m_snap_object_sizes = {}; + + librados::snap_t remote_sync_pont_snap_id = m_snap_map->rbegin()->first; + uint64_t prev_end_size = 0; + bool prev_exists = false; + librados::snap_t start_remote_snap_id = 0; + for (auto &pair : *m_snap_map) { + assert(!pair.second.empty()); + librados::snap_t end_remote_snap_id = pair.first; + librados::snap_t end_local_snap_id = pair.second.front(); + + interval_set diff; + uint64_t end_size; + bool exists; + librados::snap_t clone_end_snap_id; + calc_snap_set_diff(cct, m_snap_set, start_remote_snap_id, + end_remote_snap_id, &diff, &end_size, &exists, + &clone_end_snap_id); + + dout(20) << ": " + << "start_remote_snap=" << start_remote_snap_id << ", " + << "end_remote_snap_id=" << end_remote_snap_id << ", " + << "clone_end_snap_id=" << clone_end_snap_id << ", " + << "end_local_snap_id=" << end_local_snap_id << ", " + << "diff=" << diff << ", " + << "end_size=" << end_size << ", " + << "exists=" << exists << dendl; + if (exists) { + // clip diff to size of object (in case it was truncated) + if (end_size < prev_end_size) { + interval_set trunc; + trunc.insert(end_size, prev_end_size); + trunc.intersection_of(diff); + diff.subtract(trunc); + dout(20) << ": clearing truncate diff: " << trunc << dendl; + } + + // prepare the object map state + { + RWLock::RLocker snap_locker(m_local_image_ctx->snap_lock); + uint8_t object_state = OBJECT_EXISTS; + if (m_local_image_ctx->test_features(RBD_FEATURE_FAST_DIFF, + m_local_image_ctx->snap_lock) && + prev_exists && diff.empty() && end_size == prev_end_size) { + object_state = OBJECT_EXISTS_CLEAN; + } + m_snap_object_states[end_local_snap_id] = object_state; + } + + // reads should be issued against the newest (existing) snapshot within + // the associated snapshot object clone. writes should be issued + // against the oldest snapshot in the snap_map. + assert(clone_end_snap_id >= end_remote_snap_id); + if (clone_end_snap_id > remote_sync_pont_snap_id) { + // do not read past the sync point snapshot + clone_end_snap_id = remote_sync_pont_snap_id; + } + + // object write/zero, or truncate + // NOTE: a single snapshot clone might represent multiple snapshots, but + // the write/zero and truncate ops will only be associated with the first + // snapshot encountered within the clone since the diff will be empty for + // subsequent snapshots and the size will remain constant for a clone. + for (auto it = diff.begin(); it != diff.end(); ++it) { + dout(20) << ": read/write op: " << it.get_start() << "~" + << it.get_len() << dendl; + m_snap_sync_ops[{end_remote_snap_id, clone_end_snap_id}].emplace_back( + SYNC_OP_TYPE_WRITE, it.get_start(), it.get_len()); + } + if (end_size < prev_end_size) { + dout(20) << ": trunc op: " << end_size << dendl; + m_snap_sync_ops[{end_remote_snap_id, clone_end_snap_id}].emplace_back( + SYNC_OP_TYPE_TRUNC, end_size, 0U); + } + m_snap_object_sizes[end_remote_snap_id] = end_size; + } else { + if (prev_exists) { + // object remove + dout(20) << ": remove op" << dendl; + m_snap_sync_ops[{end_remote_snap_id, end_remote_snap_id}].emplace_back( + SYNC_OP_TYPE_REMOVE, 0U, 0U); + } + } + + prev_end_size = end_size; + prev_exists = exists; + start_remote_snap_id = end_remote_snap_id; + } +} + +template +void ObjectCopyRequest::finish(int r) { + dout(20) << ": r=" << r << dendl; + + // ensure IoCtxs are closed prior to proceeding + auto on_finish = m_on_finish; + delete this; + + on_finish->complete(r); +} + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_sync::ObjectCopyRequest;