X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Flibrbd%2Fio%2FCopyupRequest.cc;fp=src%2Fceph%2Fsrc%2Flibrbd%2Fio%2FCopyupRequest.cc;h=aee2f14d7b065ffac28fff1a7df608ce8d1db724;hb=812ff6ca9fcd3e629e49d4328905f33eee8ca3f5;hp=0000000000000000000000000000000000000000;hpb=15280273faafb77777eab341909a3f495cf248d9;p=stor4nfv.git diff --git a/src/ceph/src/librbd/io/CopyupRequest.cc b/src/ceph/src/librbd/io/CopyupRequest.cc new file mode 100644 index 0000000..aee2f14 --- /dev/null +++ b/src/ceph/src/librbd/io/CopyupRequest.cc @@ -0,0 +1,373 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/io/CopyupRequest.h" +#include "common/ceph_context.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/Mutex.h" + +#include "librbd/AsyncObjectThrottle.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ObjectMap.h" +#include "librbd/Utils.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/ImageRequest.h" +#include "librbd/io/ObjectRequest.h" +#include "librbd/io/ReadResult.h" + +#include +#include +#include + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::io::CopyupRequest: " << this \ + << " " << __func__ << ": " + +namespace librbd { +namespace io { + +namespace { + +class UpdateObjectMap : public C_AsyncObjectThrottle<> { +public: + UpdateObjectMap(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx, + uint64_t object_no, const std::vector *snap_ids, + const ZTracer::Trace &trace, size_t snap_id_idx) + : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_no(object_no), + m_snap_ids(*snap_ids), m_trace(trace), m_snap_id_idx(snap_id_idx) + { + } + + int send() override { + uint64_t snap_id = m_snap_ids[m_snap_id_idx]; + if (snap_id == CEPH_NOSNAP) { + RWLock::RLocker snap_locker(m_image_ctx.snap_lock); + RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock); + assert(m_image_ctx.exclusive_lock->is_lock_owner()); + assert(m_image_ctx.object_map != nullptr); + bool sent = m_image_ctx.object_map->aio_update( + CEPH_NOSNAP, m_object_no, OBJECT_EXISTS, {}, m_trace, this); + return (sent ? 0 : 1); + } + + uint8_t state = OBJECT_EXISTS; + if (m_image_ctx.test_features(RBD_FEATURE_FAST_DIFF) && + m_snap_id_idx + 1 < m_snap_ids.size()) { + state = OBJECT_EXISTS_CLEAN; + } + + RWLock::RLocker snap_locker(m_image_ctx.snap_lock); + RWLock::RLocker object_map_locker(m_image_ctx.object_map_lock); + if (m_image_ctx.object_map == nullptr) { + return 1; + } + + bool sent = m_image_ctx.object_map->aio_update( + snap_id, m_object_no, state, {}, m_trace, this); + assert(sent); + return 0; + } + +private: + uint64_t m_object_no; + const std::vector &m_snap_ids; + const ZTracer::Trace &m_trace; + size_t m_snap_id_idx; +}; + +} // anonymous namespace + + +CopyupRequest::CopyupRequest(ImageCtx *ictx, const std::string &oid, + uint64_t objectno, Extents &&image_extents, + const ZTracer::Trace &parent_trace) + : m_ictx(ictx), m_oid(oid), m_object_no(objectno), + m_image_extents(image_extents), + m_trace(util::create_trace(*m_ictx, "copy-up", parent_trace)), + m_state(STATE_READ_FROM_PARENT) +{ + m_async_op.start_op(*m_ictx); +} + +CopyupRequest::~CopyupRequest() { + assert(m_pending_requests.empty()); + m_async_op.finish_op(); +} + +void CopyupRequest::append_request(ObjectRequest<> *req) { + ldout(m_ictx->cct, 20) << req << dendl; + m_pending_requests.push_back(req); +} + +void CopyupRequest::complete_requests(int r) { + while (!m_pending_requests.empty()) { + vector *>::iterator it = m_pending_requests.begin(); + ObjectRequest<> *req = *it; + ldout(m_ictx->cct, 20) << "completing request " << req << dendl; + req->complete(r); + m_pending_requests.erase(it); + } +} + +bool CopyupRequest::send_copyup() { + bool add_copyup_op = !m_copyup_data.is_zero(); + bool copy_on_read = m_pending_requests.empty(); + if (!add_copyup_op && copy_on_read) { + // copyup empty object to prevent future CoR attempts + m_copyup_data.clear(); + add_copyup_op = true; + } + + ldout(m_ictx->cct, 20) << "oid " << m_oid << dendl; + m_state = STATE_COPYUP; + + m_ictx->snap_lock.get_read(); + ::SnapContext snapc = m_ictx->snapc; + m_ictx->snap_lock.put_read(); + + std::vector snaps; + + if (!copy_on_read) { + m_pending_copyups++; + } + + int r; + if (copy_on_read || (!snapc.snaps.empty() && add_copyup_op)) { + assert(add_copyup_op); + add_copyup_op = false; + + librados::ObjectWriteOperation copyup_op; + copyup_op.exec("rbd", "copyup", m_copyup_data); + + // send only the copyup request with a blank snapshot context so that + // all snapshots are detected from the parent for this object. If + // this is a CoW request, a second request will be created for the + // actual modification. + m_pending_copyups++; + + ldout(m_ictx->cct, 20) << "copyup with empty snapshot context" << dendl; + librados::AioCompletion *comp = util::create_rados_callback(this); + + librados::Rados rados(m_ictx->data_ctx); + r = rados.ioctx_create2(m_ictx->data_ctx.get_id(), m_data_ctx); + assert(r == 0); + + r = m_data_ctx.aio_operate( + m_oid, comp, ©up_op, 0, snaps, + (m_trace.valid() ? m_trace.get_info() : nullptr)); + assert(r == 0); + comp->release(); + } + + if (!copy_on_read) { + librados::ObjectWriteOperation write_op; + if (add_copyup_op) { + // CoW did not need to handle existing snapshots + write_op.exec("rbd", "copyup", m_copyup_data); + } + + // merge all pending write ops into this single RADOS op + for (size_t i=0; i *req = m_pending_requests[i]; + ldout(m_ictx->cct, 20) << "add_copyup_ops " << req << dendl; + bool set_hints = (i == 0); + req->add_copyup_ops(&write_op, set_hints); + } + assert(write_op.size() != 0); + + snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); + librados::AioCompletion *comp = util::create_rados_callback(this); + r = m_ictx->data_ctx.aio_operate( + m_oid, comp, &write_op, snapc.seq, snaps, + (m_trace.valid() ? m_trace.get_info() : nullptr)); + assert(r == 0); + comp->release(); + } + return false; +} + +bool CopyupRequest::is_copyup_required() { + bool noop = true; + for (const ObjectRequest<> *req : m_pending_requests) { + if (!req->is_op_payload_empty()) { + noop = false; + break; + } + } + + return (m_copyup_data.is_zero() && noop); +} + +void CopyupRequest::send() +{ + m_state = STATE_READ_FROM_PARENT; + AioCompletion *comp = AioCompletion::create_and_start( + this, m_ictx, AIO_TYPE_READ); + + ldout(m_ictx->cct, 20) << "completion " << comp + << ", oid " << m_oid + << ", extents " << m_image_extents + << dendl; + ImageRequest<>::aio_read(m_ictx->parent, comp, std::move(m_image_extents), + ReadResult{&m_copyup_data}, 0, m_trace); +} + +void CopyupRequest::complete(int r) +{ + if (should_complete(r)) { + complete_requests(r); + delete this; + } +} + +bool CopyupRequest::should_complete(int r) +{ + CephContext *cct = m_ictx->cct; + ldout(cct, 20) << "oid " << m_oid + << ", r " << r << dendl; + + uint64_t pending_copyups; + switch (m_state) { + case STATE_READ_FROM_PARENT: + ldout(cct, 20) << "READ_FROM_PARENT" << dendl; + remove_from_list(); + if (r >= 0 || r == -ENOENT) { + if (is_copyup_required()) { + ldout(cct, 20) << "nop, skipping" << dendl; + return true; + } + + return send_object_map_head(); + } + break; + + case STATE_OBJECT_MAP_HEAD: + ldout(cct, 20) << "OBJECT_MAP_HEAD" << dendl; + assert(r == 0); + return send_object_map(); + + case STATE_OBJECT_MAP: + ldout(cct, 20) << "OBJECT_MAP" << dendl; + assert(r == 0); + return send_copyup(); + + case STATE_COPYUP: + // invoked via a finisher in librados, so thread safe + pending_copyups = --m_pending_copyups; + ldout(cct, 20) << "COPYUP (" << pending_copyups << " pending)" + << dendl; + if (r == -ENOENT) { + // hide the -ENOENT error if this is the last op + if (pending_copyups == 0) { + complete_requests(0); + } + } else if (r < 0) { + complete_requests(r); + } + return (pending_copyups == 0); + + default: + lderr(cct) << "invalid state: " << m_state << dendl; + assert(false); + break; + } + return (r < 0); +} + +void CopyupRequest::remove_from_list() +{ + Mutex::Locker l(m_ictx->copyup_list_lock); + + map::iterator it = + m_ictx->copyup_list.find(m_object_no); + assert(it != m_ictx->copyup_list.end()); + m_ictx->copyup_list.erase(it); +} + +bool CopyupRequest::send_object_map_head() { + CephContext *cct = m_ictx->cct; + ldout(cct, 20) << dendl; + + m_state = STATE_OBJECT_MAP_HEAD; + + { + RWLock::RLocker owner_locker(m_ictx->owner_lock); + RWLock::RLocker snap_locker(m_ictx->snap_lock); + if (m_ictx->object_map != nullptr) { + bool copy_on_read = m_pending_requests.empty(); + assert(m_ictx->exclusive_lock->is_lock_owner()); + + RWLock::WLocker object_map_locker(m_ictx->object_map_lock); + if (!m_ictx->snaps.empty()) { + m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.begin(), + m_ictx->snaps.end()); + } + if (copy_on_read && + (*m_ictx->object_map)[m_object_no] != OBJECT_EXISTS) { + m_snap_ids.insert(m_snap_ids.begin(), CEPH_NOSNAP); + object_map_locker.unlock(); + snap_locker.unlock(); + owner_locker.unlock(); + return send_object_map(); + } + + bool may_update = false; + uint8_t new_state, current_state; + + vector *>::reverse_iterator r_it = m_pending_requests.rbegin(); + for (; r_it != m_pending_requests.rend(); ++r_it) { + ObjectRequest<> *req = *r_it; + if (!req->pre_object_map_update(&new_state)) { + continue; + } + + current_state = (*m_ictx->object_map)[m_object_no]; + ldout(cct, 20) << req->get_op_type() << " object no " + << m_object_no << " current state " + << stringify(static_cast(current_state)) + << " new state " << stringify(static_cast(new_state)) + << dendl; + may_update = true; + break; + } + + if (may_update && (new_state != current_state) && + m_ictx->object_map->aio_update( + CEPH_NOSNAP, m_object_no, new_state, current_state, m_trace, + this)) { + return false; + } + } + } + + return send_object_map(); +} + +bool CopyupRequest::send_object_map() { + // avoid possible recursive lock attempts + if (m_snap_ids.empty()) { + // no object map update required + return send_copyup(); + } else { + // update object maps for HEAD and all existing snapshots + ldout(m_ictx->cct, 20) << "oid " << m_oid << dendl; + m_state = STATE_OBJECT_MAP; + + RWLock::RLocker owner_locker(m_ictx->owner_lock); + AsyncObjectThrottle<>::ContextFactory context_factory( + boost::lambda::bind(boost::lambda::new_ptr(), + boost::lambda::_1, m_ictx, m_object_no, &m_snap_ids, m_trace, + boost::lambda::_2)); + AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>( + NULL, *m_ictx, context_factory, util::create_context_callback(this), + NULL, 0, m_snap_ids.size()); + throttle->start_ops(m_ictx->concurrent_management_ops); + } + return false; +} + +} // namespace io +} // namespace librbd