X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Flibrbd%2Fio%2FImageRequest.cc;fp=src%2Fceph%2Fsrc%2Flibrbd%2Fio%2FImageRequest.cc;h=434c208eefd1b56679988f9f1a1505251d063124;hb=812ff6ca9fcd3e629e49d4328905f33eee8ca3f5;hp=0000000000000000000000000000000000000000;hpb=15280273faafb77777eab341909a3f495cf248d9;p=stor4nfv.git diff --git a/src/ceph/src/librbd/io/ImageRequest.cc b/src/ceph/src/librbd/io/ImageRequest.cc new file mode 100644 index 0000000..434c208 --- /dev/null +++ b/src/ceph/src/librbd/io/ImageRequest.cc @@ -0,0 +1,1020 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/io/ImageRequest.h" +#include "librbd/ImageCtx.h" +#include "librbd/internal.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/cache/ImageCache.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/ObjectRequest.h" +#include "librbd/journal/Types.h" +#include "include/rados/librados.hpp" +#include "common/WorkQueue.h" +#include "osdc/Striper.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::io::ImageRequest: " << this \ + << " " << __func__ << ": " + +namespace librbd { +namespace io { + +using util::get_image_ctx; + +namespace { + +template +struct C_DiscardJournalCommit : public Context { + typedef std::vector ObjectExtents; + + ImageCtxT &image_ctx; + AioCompletion *aio_comp; + ObjectExtents object_extents; + + C_DiscardJournalCommit(ImageCtxT &_image_ctx, AioCompletion *_aio_comp, + const ObjectExtents &_object_extents, uint64_t tid) + : image_ctx(_image_ctx), aio_comp(_aio_comp), + object_extents(_object_extents) { + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << "delaying cache discard until journal tid " << tid << " " + << "safe" << dendl; + + aio_comp->add_request(); + } + + void finish(int r) override { + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << "C_DiscardJournalCommit: " + << "journal committed: discarding from cache" << dendl; + + Mutex::Locker cache_locker(image_ctx.cache_lock); + image_ctx.object_cacher->discard_set(image_ctx.object_set, object_extents); + aio_comp->complete_request(r); + } +}; + +template +struct C_FlushJournalCommit : public Context { + ImageCtxT &image_ctx; + AioCompletion *aio_comp; + + C_FlushJournalCommit(ImageCtxT &_image_ctx, AioCompletion *_aio_comp, + uint64_t tid) + : image_ctx(_image_ctx), aio_comp(_aio_comp) { + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << "delaying flush until journal tid " << tid << " " + << "safe" << dendl; + + aio_comp->add_request(); + } + + void finish(int r) override { + CephContext *cct = image_ctx.cct; + ldout(cct, 20) << "C_FlushJournalCommit: journal committed" << dendl; + aio_comp->complete_request(r); + } +}; + +template +class C_ObjectCacheRead : public Context { +public: + explicit C_ObjectCacheRead(ImageCtxT &ictx, ObjectReadRequest *req) + : m_image_ctx(ictx), m_req(req), m_enqueued(false) {} + + void complete(int r) override { + if (!m_enqueued) { + // cache_lock creates a lock ordering issue -- so re-execute this context + // outside the cache_lock + m_enqueued = true; + m_image_ctx.op_work_queue->queue(this, r); + return; + } + Context::complete(r); + } + +protected: + void finish(int r) override { + m_req->complete(r); + } + +private: + ImageCtxT &m_image_ctx; + ObjectReadRequest *m_req; + bool m_enqueued; +}; + +} // anonymous namespace + +template +ImageRequest* ImageRequest::create_read_request( + I &image_ctx, AioCompletion *aio_comp, Extents &&image_extents, + ReadResult &&read_result, int op_flags, + const ZTracer::Trace &parent_trace) { + return new ImageReadRequest(image_ctx, aio_comp, + std::move(image_extents), + std::move(read_result), op_flags, + parent_trace); +} + +template +ImageRequest* ImageRequest::create_write_request( + I &image_ctx, AioCompletion *aio_comp, Extents &&image_extents, + bufferlist &&bl, int op_flags, const ZTracer::Trace &parent_trace) { + return new ImageWriteRequest(image_ctx, aio_comp, std::move(image_extents), + std::move(bl), op_flags, parent_trace); +} + +template +ImageRequest* ImageRequest::create_discard_request( + I &image_ctx, AioCompletion *aio_comp, uint64_t off, uint64_t len, + bool skip_partial_discard, const ZTracer::Trace &parent_trace) { + return new ImageDiscardRequest(image_ctx, aio_comp, off, len, + skip_partial_discard, parent_trace); +} + +template +ImageRequest* ImageRequest::create_flush_request( + I &image_ctx, AioCompletion *aio_comp, + const ZTracer::Trace &parent_trace) { + return new ImageFlushRequest(image_ctx, aio_comp, parent_trace); +} + +template +ImageRequest* ImageRequest::create_writesame_request( + I &image_ctx, AioCompletion *aio_comp, uint64_t off, uint64_t len, + bufferlist &&bl, int op_flags, const ZTracer::Trace &parent_trace) { + return new ImageWriteSameRequest(image_ctx, aio_comp, off, len, + std::move(bl), op_flags, parent_trace); +} + +template +ImageRequest* ImageRequest::create_compare_and_write_request( + I &image_ctx, AioCompletion *c, Extents &&image_extents, + bufferlist &&cmp_bl, bufferlist &&bl, uint64_t *mismatch_offset, + int op_flags, const ZTracer::Trace &parent_trace) { + return new ImageCompareAndWriteRequest(image_ctx, c, + std::move(image_extents), + std::move(cmp_bl), + std::move(bl), mismatch_offset, + op_flags, parent_trace); +} + +template +void ImageRequest::aio_read(I *ictx, AioCompletion *c, + Extents &&image_extents, + ReadResult &&read_result, int op_flags, + const ZTracer::Trace &parent_trace) { + ImageReadRequest req(*ictx, c, std::move(image_extents), + std::move(read_result), op_flags, parent_trace); + req.send(); +} + +template +void ImageRequest::aio_write(I *ictx, AioCompletion *c, + Extents &&image_extents, bufferlist &&bl, + int op_flags, + const ZTracer::Trace &parent_trace) { + ImageWriteRequest req(*ictx, c, std::move(image_extents), std::move(bl), + op_flags, parent_trace); + req.send(); +} + +template +void ImageRequest::aio_discard(I *ictx, AioCompletion *c, + uint64_t off, uint64_t len, + bool skip_partial_discard, + const ZTracer::Trace &parent_trace) { + ImageDiscardRequest req(*ictx, c, off, len, skip_partial_discard, + parent_trace); + req.send(); +} + +template +void ImageRequest::aio_flush(I *ictx, AioCompletion *c, + const ZTracer::Trace &parent_trace) { + ImageFlushRequest req(*ictx, c, parent_trace); + req.send(); +} + +template +void ImageRequest::aio_writesame(I *ictx, AioCompletion *c, + uint64_t off, uint64_t len, + bufferlist &&bl, int op_flags, + const ZTracer::Trace &parent_trace) { + ImageWriteSameRequest req(*ictx, c, off, len, std::move(bl), op_flags, + parent_trace); + req.send(); +} + +template +void ImageRequest::aio_compare_and_write(I *ictx, AioCompletion *c, + Extents &&image_extents, + bufferlist &&cmp_bl, + bufferlist &&bl, + uint64_t *mismatch_offset, + int op_flags, + const ZTracer::Trace &parent_trace) { + ImageCompareAndWriteRequest req(*ictx, c, std::move(image_extents), + std::move(cmp_bl), std::move(bl), + mismatch_offset, op_flags, parent_trace); + req.send(); +} + + +template +void ImageRequest::send() { + I &image_ctx = this->m_image_ctx; + assert(m_aio_comp->is_initialized(get_aio_type())); + assert(m_aio_comp->is_started() ^ (get_aio_type() == AIO_TYPE_FLUSH)); + + CephContext *cct = image_ctx.cct; + AioCompletion *aio_comp = this->m_aio_comp; + ldout(cct, 20) << get_request_type() << ": ictx=" << &image_ctx << ", " + << "completion=" << aio_comp << dendl; + + aio_comp->get(); + int r = clip_request(); + if (r < 0) { + m_aio_comp->fail(r); + return; + } + + if (m_bypass_image_cache || m_image_ctx.image_cache == nullptr) { + send_request(); + } else { + send_image_cache_request(); + } +} + +template +int ImageRequest::clip_request() { + RWLock::RLocker snap_locker(m_image_ctx.snap_lock); + for (auto &image_extent : m_image_extents) { + auto clip_len = image_extent.second; + int r = clip_io(get_image_ctx(&m_image_ctx), image_extent.first, &clip_len); + if (r < 0) { + return r; + } + + image_extent.second = clip_len; + } + return 0; +} + +template +void ImageRequest::start_op() { + m_aio_comp->start_op(); +} + +template +void ImageRequest::fail(int r) { + AioCompletion *aio_comp = this->m_aio_comp; + aio_comp->get(); + aio_comp->fail(r); +} + +template +ImageReadRequest::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp, + Extents &&image_extents, + ReadResult &&read_result, int op_flags, + const ZTracer::Trace &parent_trace) + : ImageRequest(image_ctx, aio_comp, std::move(image_extents), "read", + parent_trace), + m_op_flags(op_flags) { + aio_comp->read_result = std::move(read_result); +} + +template +int ImageReadRequest::clip_request() { + int r = ImageRequest::clip_request(); + if (r < 0) { + return r; + } + + uint64_t buffer_length = 0; + auto &image_extents = this->m_image_extents; + for (auto &image_extent : image_extents) { + buffer_length += image_extent.second; + } + this->m_aio_comp->read_result.set_clip_length(buffer_length); + return 0; +} + +template +void ImageReadRequest::send_request() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + auto &image_extents = this->m_image_extents; + if (image_ctx.object_cacher && image_ctx.readahead_max_bytes > 0 && + !(m_op_flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM)) { + readahead(get_image_ctx(&image_ctx), image_extents); + } + + AioCompletion *aio_comp = this->m_aio_comp; + librados::snap_t snap_id; + map > object_extents; + uint64_t buffer_ofs = 0; + { + // prevent image size from changing between computing clip and recording + // pending async operation + RWLock::RLocker snap_locker(image_ctx.snap_lock); + snap_id = image_ctx.snap_id; + + // map image extents to object extents + for (auto &extent : image_extents) { + if (extent.second == 0) { + continue; + } + + Striper::file_to_extents(cct, image_ctx.format_string, &image_ctx.layout, + extent.first, extent.second, 0, object_extents, + buffer_ofs); + buffer_ofs += extent.second; + } + } + + // pre-calculate the expected number of read requests + uint32_t request_count = 0; + for (auto &object_extent : object_extents) { + request_count += object_extent.second.size(); + } + aio_comp->set_request_count(request_count); + + // issue the requests + for (auto &object_extent : object_extents) { + for (auto &extent : object_extent.second) { + ldout(cct, 20) << "oid " << extent.oid << " " << extent.offset << "~" + << extent.length << " from " << extent.buffer_extents + << dendl; + + auto req_comp = new io::ReadResult::C_SparseReadRequest( + aio_comp); + ObjectReadRequest *req = ObjectReadRequest::create( + &image_ctx, extent.oid.name, extent.objectno, extent.offset, + extent.length, extent.buffer_extents, snap_id, true, m_op_flags, + this->m_trace, req_comp); + req_comp->request = req; + + if (image_ctx.object_cacher) { + C_ObjectCacheRead *cache_comp = new C_ObjectCacheRead(image_ctx, + req); + image_ctx.aio_read_from_cache( + extent.oid, extent.objectno, &req->data(), extent.length, + extent.offset, cache_comp, m_op_flags, + (this->m_trace.valid() ? &this->m_trace : nullptr)); + } else { + req->send(); + } + } + } + + aio_comp->put(); + + image_ctx.perfcounter->inc(l_librbd_rd); + image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs); +} + +template +void ImageReadRequest::send_image_cache_request() { + I &image_ctx = this->m_image_ctx; + assert(image_ctx.image_cache != nullptr); + + AioCompletion *aio_comp = this->m_aio_comp; + aio_comp->set_request_count(1); + + auto *req_comp = new io::ReadResult::C_ImageReadRequest( + aio_comp, this->m_image_extents); + image_ctx.image_cache->aio_read(std::move(this->m_image_extents), + &req_comp->bl, m_op_flags, + req_comp); +} + +template +void AbstractImageWriteRequest::send_request() { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + RWLock::RLocker md_locker(image_ctx.md_lock); + + bool journaling = false; + + AioCompletion *aio_comp = this->m_aio_comp; + uint64_t clip_len = 0; + ObjectExtents object_extents; + ::SnapContext snapc; + { + // prevent image size from changing between computing clip and recording + // pending async operation + RWLock::RLocker snap_locker(image_ctx.snap_lock); + if (image_ctx.snap_id != CEPH_NOSNAP || image_ctx.read_only) { + aio_comp->fail(-EROFS); + return; + } + + for (auto &extent : this->m_image_extents) { + if (extent.second == 0) { + continue; + } + + // map to object extents + Striper::file_to_extents(cct, image_ctx.format_string, &image_ctx.layout, + extent.first, extent.second, 0, object_extents); + clip_len += extent.second; + } + + snapc = image_ctx.snapc; + journaling = (image_ctx.journal != nullptr && + image_ctx.journal->is_journal_appending()); + } + + int ret = prune_object_extents(object_extents); + if (ret < 0) { + aio_comp->fail(ret); + return; + } + + if (!object_extents.empty()) { + uint64_t journal_tid = 0; + aio_comp->set_request_count( + object_extents.size() + get_object_cache_request_count(journaling)); + + ObjectRequests requests; + send_object_requests(object_extents, snapc, + (journaling ? &requests : nullptr)); + + if (journaling) { + // in-flight ops are flushed prior to closing the journal + assert(image_ctx.journal != NULL); + journal_tid = append_journal_event(requests, m_synchronous); + } + + if (image_ctx.object_cacher != NULL) { + send_object_cache_requests(object_extents, journal_tid); + } + } else { + // no IO to perform -- fire completion + aio_comp->unblock(); + } + + update_stats(clip_len); + aio_comp->put(); +} + +template +void AbstractImageWriteRequest::send_object_requests( + const ObjectExtents &object_extents, const ::SnapContext &snapc, + ObjectRequests *object_requests) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + + AioCompletion *aio_comp = this->m_aio_comp; + for (ObjectExtents::const_iterator p = object_extents.begin(); + p != object_extents.end(); ++p) { + ldout(cct, 20) << "oid " << p->oid << " " << p->offset << "~" << p->length + << " from " << p->buffer_extents << dendl; + C_AioRequest *req_comp = new C_AioRequest(aio_comp); + ObjectRequestHandle *request = create_object_request(*p, snapc, + req_comp); + + // if journaling, stash the request for later; otherwise send + if (request != NULL) { + if (object_requests != NULL) { + object_requests->push_back(request); + } else { + request->send(); + } + } + } +} + +template +void ImageWriteRequest::assemble_extent(const ObjectExtent &object_extent, + bufferlist *bl) { + for (auto q = object_extent.buffer_extents.begin(); + q != object_extent.buffer_extents.end(); ++q) { + bufferlist sub_bl; + sub_bl.substr_of(m_bl, q->first, q->second); + bl->claim_append(sub_bl); + } +} + +template +uint64_t ImageWriteRequest::append_journal_event( + const ObjectRequests &requests, bool synchronous) { + I &image_ctx = this->m_image_ctx; + + uint64_t tid = 0; + uint64_t buffer_offset = 0; + assert(!this->m_image_extents.empty()); + for (auto &extent : this->m_image_extents) { + bufferlist sub_bl; + sub_bl.substr_of(m_bl, buffer_offset, extent.second); + buffer_offset += extent.second; + + tid = image_ctx.journal->append_write_event(extent.first, extent.second, + sub_bl, requests, synchronous); + } + + if (image_ctx.object_cacher == NULL) { + AioCompletion *aio_comp = this->m_aio_comp; + aio_comp->associate_journal_event(tid); + } + return tid; +} + +template +void ImageWriteRequest::send_image_cache_request() { + I &image_ctx = this->m_image_ctx; + assert(image_ctx.image_cache != nullptr); + + AioCompletion *aio_comp = this->m_aio_comp; + aio_comp->set_request_count(1); + C_AioRequest *req_comp = new C_AioRequest(aio_comp); + image_ctx.image_cache->aio_write(std::move(this->m_image_extents), + std::move(m_bl), m_op_flags, req_comp); +} + +template +void ImageWriteRequest::send_object_cache_requests( + const ObjectExtents &object_extents, uint64_t journal_tid) { + I &image_ctx = this->m_image_ctx; + for (auto p = object_extents.begin(); p != object_extents.end(); ++p) { + const ObjectExtent &object_extent = *p; + + bufferlist bl; + assemble_extent(object_extent, &bl); + + AioCompletion *aio_comp = this->m_aio_comp; + C_AioRequest *req_comp = new C_AioRequest(aio_comp); + image_ctx.write_to_cache( + object_extent.oid, bl, object_extent.length, object_extent.offset, + req_comp, m_op_flags, journal_tid, + (this->m_trace.valid() ? &this->m_trace : nullptr)); + } +} + +template +void ImageWriteRequest::send_object_requests( + const ObjectExtents &object_extents, const ::SnapContext &snapc, + ObjectRequests *object_requests) { + I &image_ctx = this->m_image_ctx; + + // cache handles creating object requests during writeback + if (image_ctx.object_cacher == NULL) { + AbstractImageWriteRequest::send_object_requests(object_extents, snapc, + object_requests); + } +} + +template +ObjectRequestHandle *ImageWriteRequest::create_object_request( + const ObjectExtent &object_extent, const ::SnapContext &snapc, + Context *on_finish) { + I &image_ctx = this->m_image_ctx; + assert(image_ctx.object_cacher == NULL); + + bufferlist bl; + assemble_extent(object_extent, &bl); + ObjectRequest *req = ObjectRequest::create_write( + &image_ctx, object_extent.oid.name, object_extent.objectno, + object_extent.offset, bl, snapc, m_op_flags, this->m_trace, on_finish); + return req; +} + +template +void ImageWriteRequest::update_stats(size_t length) { + I &image_ctx = this->m_image_ctx; + image_ctx.perfcounter->inc(l_librbd_wr); + image_ctx.perfcounter->inc(l_librbd_wr_bytes, length); +} + +template +uint64_t ImageDiscardRequest::append_journal_event( + const ObjectRequests &requests, bool synchronous) { + I &image_ctx = this->m_image_ctx; + + uint64_t tid = 0; + assert(!this->m_image_extents.empty()); + for (auto &extent : this->m_image_extents) { + journal::EventEntry event_entry(journal::AioDiscardEvent(extent.first, + extent.second, + this->m_skip_partial_discard)); + tid = image_ctx.journal->append_io_event(std::move(event_entry), + requests, extent.first, + extent.second, synchronous); + } + + AioCompletion *aio_comp = this->m_aio_comp; + aio_comp->associate_journal_event(tid); + return tid; +} + +template +int ImageDiscardRequest::prune_object_extents(ObjectExtents &object_extents) { + I &image_ctx = this->m_image_ctx; + CephContext *cct = image_ctx.cct; + if (!this->m_skip_partial_discard) { + return 0; + } + + for (auto p = object_extents.begin(); p != object_extents.end(); ) { + if (p->offset + p->length < image_ctx.layout.object_size) { + ldout(cct, 20) << "oid " << p->oid << " " << p->offset << "~" + << p->length << " from " << p->buffer_extents + << ": skip partial discard" << dendl; + p = object_extents.erase(p); + } else { + ++p; + } + } + + return 0; +} + +template +uint32_t ImageDiscardRequest::get_object_cache_request_count(bool journaling) const { + // extra completion request is required for tracking journal commit + I &image_ctx = this->m_image_ctx; + return (image_ctx.object_cacher != nullptr && journaling ? 1 : 0); +} + +template +void ImageDiscardRequest::send_image_cache_request() { + I &image_ctx = this->m_image_ctx; + assert(image_ctx.image_cache != nullptr); + + AioCompletion *aio_comp = this->m_aio_comp; + aio_comp->set_request_count(this->m_image_extents.size()); + for (auto &extent : this->m_image_extents) { + C_AioRequest *req_comp = new C_AioRequest(aio_comp); + image_ctx.image_cache->aio_discard(extent.first, extent.second, + this->m_skip_partial_discard, req_comp); + } +} + +template +void ImageDiscardRequest::send_object_cache_requests( + const ObjectExtents &object_extents, uint64_t journal_tid) { + I &image_ctx = this->m_image_ctx; + if (journal_tid == 0) { + Mutex::Locker cache_locker(image_ctx.cache_lock); + image_ctx.object_cacher->discard_set(image_ctx.object_set, + object_extents); + } else { + // cannot discard from cache until journal has committed + assert(image_ctx.journal != NULL); + AioCompletion *aio_comp = this->m_aio_comp; + image_ctx.journal->wait_event( + journal_tid, new C_DiscardJournalCommit(image_ctx, aio_comp, + object_extents, journal_tid)); + } +} + +template +ObjectRequestHandle *ImageDiscardRequest::create_object_request( + const ObjectExtent &object_extent, const ::SnapContext &snapc, + Context *on_finish) { + I &image_ctx = this->m_image_ctx; + + ObjectRequest *req; + if (object_extent.length == image_ctx.layout.object_size) { + req = ObjectRequest::create_remove( + &image_ctx, object_extent.oid.name, object_extent.objectno, snapc, + this->m_trace, on_finish); + } else if (object_extent.offset + object_extent.length == + image_ctx.layout.object_size) { + req = ObjectRequest::create_truncate( + &image_ctx, object_extent.oid.name, object_extent.objectno, + object_extent.offset, snapc, this->m_trace, on_finish); + } else { + req = ObjectRequest::create_zero( + &image_ctx, object_extent.oid.name, object_extent.objectno, + object_extent.offset, object_extent.length, snapc, + this->m_trace, on_finish); + } + return req; +} + +template +void ImageDiscardRequest::update_stats(size_t length) { + I &image_ctx = this->m_image_ctx; + image_ctx.perfcounter->inc(l_librbd_discard); + image_ctx.perfcounter->inc(l_librbd_discard_bytes, length); +} + +template +void ImageFlushRequest::send_request() { + I &image_ctx = this->m_image_ctx; + image_ctx.user_flushed(); + + bool journaling = false; + { + RWLock::RLocker snap_locker(image_ctx.snap_lock); + journaling = (image_ctx.journal != nullptr && + image_ctx.journal->is_journal_appending()); + } + + AioCompletion *aio_comp = this->m_aio_comp; + if (journaling) { + // in-flight ops are flushed prior to closing the journal + uint64_t journal_tid = image_ctx.journal->append_io_event( + journal::EventEntry(journal::AioFlushEvent()), + ObjectRequests(), 0, 0, false); + + aio_comp->set_request_count(1); + aio_comp->associate_journal_event(journal_tid); + + FunctionContext *flush_ctx = new FunctionContext( + [aio_comp, &image_ctx, journal_tid] (int r) { + auto ctx = new C_FlushJournalCommit(image_ctx, aio_comp, + journal_tid); + image_ctx.journal->flush_event(journal_tid, ctx); + + // track flush op for block writes + aio_comp->start_op(true); + aio_comp->put(); + }); + + image_ctx.flush_async_operations(flush_ctx); + } else { + // flush rbd cache only when journaling is not enabled + aio_comp->set_request_count(1); + C_AioRequest *req_comp = new C_AioRequest(aio_comp); + image_ctx.flush(req_comp); + + aio_comp->start_op(true); + aio_comp->put(); + } + + image_ctx.perfcounter->inc(l_librbd_aio_flush); +} + +template +void ImageFlushRequest::send_image_cache_request() { + I &image_ctx = this->m_image_ctx; + assert(image_ctx.image_cache != nullptr); + + AioCompletion *aio_comp = this->m_aio_comp; + aio_comp->set_request_count(1); + C_AioRequest *req_comp = new C_AioRequest(aio_comp); + image_ctx.image_cache->aio_flush(req_comp); +} + +template +bool ImageWriteSameRequest::assemble_writesame_extent(const ObjectExtent &object_extent, + bufferlist *bl, bool force_write) { + size_t m_data_len = m_data_bl.length(); + + if (!force_write) { + bool may_writesame = true; + + for (auto q = object_extent.buffer_extents.begin(); + q != object_extent.buffer_extents.end(); ++q) { + if (!(q->first % m_data_len == 0 && q->second % m_data_len == 0)) { + may_writesame = false; + break; + } + } + + if (may_writesame) { + bl->append(m_data_bl); + return true; + } + } + + for (auto q = object_extent.buffer_extents.begin(); + q != object_extent.buffer_extents.end(); ++q) { + bufferlist sub_bl; + uint64_t sub_off = q->first % m_data_len; + uint64_t sub_len = m_data_len - sub_off; + uint64_t extent_left = q->second; + while (extent_left >= sub_len) { + sub_bl.substr_of(m_data_bl, sub_off, sub_len); + bl->claim_append(sub_bl); + extent_left -= sub_len; + if (sub_off) { + sub_off = 0; + sub_len = m_data_len; + } + } + if (extent_left) { + sub_bl.substr_of(m_data_bl, sub_off, extent_left); + bl->claim_append(sub_bl); + } + } + return false; +} + +template +uint64_t ImageWriteSameRequest::append_journal_event( + const ObjectRequests &requests, bool synchronous) { + I &image_ctx = this->m_image_ctx; + + uint64_t tid = 0; + assert(!this->m_image_extents.empty()); + for (auto &extent : this->m_image_extents) { + journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first, + extent.second, + m_data_bl)); + tid = image_ctx.journal->append_io_event(std::move(event_entry), + requests, extent.first, + extent.second, synchronous); + } + + if (image_ctx.object_cacher == NULL) { + AioCompletion *aio_comp = this->m_aio_comp; + aio_comp->associate_journal_event(tid); + } + return tid; +} + +template +void ImageWriteSameRequest::send_image_cache_request() { + I &image_ctx = this->m_image_ctx; + assert(image_ctx.image_cache != nullptr); + + AioCompletion *aio_comp = this->m_aio_comp; + aio_comp->set_request_count(this->m_image_extents.size()); + for (auto &extent : this->m_image_extents) { + C_AioRequest *req_comp = new C_AioRequest(aio_comp); + image_ctx.image_cache->aio_writesame(extent.first, extent.second, + std::move(m_data_bl), m_op_flags, + req_comp); + } +} + +template +void ImageWriteSameRequest::send_object_cache_requests( + const ObjectExtents &object_extents, uint64_t journal_tid) { + I &image_ctx = this->m_image_ctx; + for (auto p = object_extents.begin(); p != object_extents.end(); ++p) { + const ObjectExtent &object_extent = *p; + + bufferlist bl; + assemble_writesame_extent(object_extent, &bl, true); + + AioCompletion *aio_comp = this->m_aio_comp; + C_AioRequest *req_comp = new C_AioRequest(aio_comp); + image_ctx.write_to_cache( + object_extent.oid, bl, object_extent.length, object_extent.offset, + req_comp, m_op_flags, journal_tid, + (this->m_trace.valid() ? &this->m_trace : nullptr)); + } +} + +template +void ImageWriteSameRequest::send_object_requests( + const ObjectExtents &object_extents, const ::SnapContext &snapc, + ObjectRequests *object_requests) { + I &image_ctx = this->m_image_ctx; + + // cache handles creating object requests during writeback + if (image_ctx.object_cacher == NULL) { + AbstractImageWriteRequest::send_object_requests(object_extents, snapc, + object_requests); + } +} + +template +ObjectRequestHandle *ImageWriteSameRequest::create_object_request( + const ObjectExtent &object_extent, const ::SnapContext &snapc, + Context *on_finish) { + I &image_ctx = this->m_image_ctx; + assert(image_ctx.object_cacher == NULL); + + bufferlist bl; + ObjectRequest *req; + + if (assemble_writesame_extent(object_extent, &bl, false)) { + req = ObjectRequest::create_writesame( + &image_ctx, object_extent.oid.name, object_extent.objectno, + object_extent.offset, object_extent.length, + bl, snapc, m_op_flags, this->m_trace, on_finish); + return req; + } + req = ObjectRequest::create_write( + &image_ctx, object_extent.oid.name, object_extent.objectno, + object_extent.offset, bl, snapc, m_op_flags, this->m_trace, on_finish); + return req; +} + +template +void ImageWriteSameRequest::update_stats(size_t length) { + I &image_ctx = this->m_image_ctx; + image_ctx.perfcounter->inc(l_librbd_ws); + image_ctx.perfcounter->inc(l_librbd_ws_bytes, length); +} + +template +uint64_t ImageCompareAndWriteRequest::append_journal_event( + const ObjectRequests &requests, bool synchronous) { + + I &image_ctx = this->m_image_ctx; + + uint64_t tid = 0; + assert(this->m_image_extents.size() == 1); + auto &extent = this->m_image_extents.front(); + journal::EventEntry event_entry(journal::AioCompareAndWriteEvent(extent.first, + extent.second, + m_cmp_bl, m_bl)); + tid = image_ctx.journal->append_io_event(std::move(event_entry), + requests, extent.first, + extent.second, synchronous); + + AioCompletion *aio_comp = this->m_aio_comp; + aio_comp->associate_journal_event(tid); + + return tid; +} + +template +void ImageCompareAndWriteRequest::send_object_cache_requests( + const ObjectExtents &object_extents, uint64_t journal_tid) { + I &image_ctx = this->m_image_ctx; + + if (image_ctx.object_cacher != NULL) { + Mutex::Locker cache_locker(image_ctx.cache_lock); + image_ctx.object_cacher->discard_set(image_ctx.object_set, + object_extents); + } +} + +template +void ImageCompareAndWriteRequest::assemble_extent( + const ObjectExtent &object_extent, bufferlist *bl) { + for (auto q = object_extent.buffer_extents.begin(); + q != object_extent.buffer_extents.end(); ++q) { + bufferlist sub_bl; + sub_bl.substr_of(m_bl, q->first, q->second); + bl->claim_append(sub_bl); + } +} + +template +void ImageCompareAndWriteRequest::send_image_cache_request() { + I &image_ctx = this->m_image_ctx; + assert(image_ctx.image_cache != nullptr); + + AioCompletion *aio_comp = this->m_aio_comp; + aio_comp->set_request_count(1); + C_AioRequest *req_comp = new C_AioRequest(aio_comp); + image_ctx.image_cache->aio_compare_and_write( + std::move(this->m_image_extents), std::move(m_cmp_bl), std::move(m_bl), + m_mismatch_offset, m_op_flags, req_comp); +} + +template +ObjectRequestHandle *ImageCompareAndWriteRequest::create_object_request( + const ObjectExtent &object_extent, + const ::SnapContext &snapc, + Context *on_finish) { + I &image_ctx = this->m_image_ctx; + + bufferlist bl; + assemble_extent(object_extent, &bl); + ObjectRequest *req = ObjectRequest::create_compare_and_write( + &image_ctx, object_extent.oid.name, + object_extent.objectno, object_extent.offset, + m_cmp_bl, bl, snapc, m_mismatch_offset, + m_op_flags, this->m_trace, on_finish); + return req; +} + +template +void ImageCompareAndWriteRequest::update_stats(size_t length) { + I &image_ctx = this->m_image_ctx; + image_ctx.perfcounter->inc(l_librbd_cmp); + image_ctx.perfcounter->inc(l_librbd_cmp_bytes, length); +} + +template +int ImageCompareAndWriteRequest::prune_object_extents(ObjectExtents &object_extents) { + if (object_extents.size() > 1) + return -EINVAL; + + I &image_ctx = this->m_image_ctx; + uint64_t sector_size = 512ULL; + uint64_t su = image_ctx.layout.stripe_unit; + ObjectExtent object_extent = object_extents.front(); + if (object_extent.offset % sector_size + object_extent.length > sector_size || + (su != 0 && (object_extent.offset % su + object_extent.length > su))) + return -EINVAL; + + return 0; +} + +} // namespace io +} // namespace librbd + +template class librbd::io::ImageRequest; +template class librbd::io::ImageReadRequest; +template class librbd::io::AbstractImageWriteRequest; +template class librbd::io::ImageWriteRequest; +template class librbd::io::ImageDiscardRequest; +template class librbd::io::ImageFlushRequest; +template class librbd::io::ImageWriteSameRequest; +template class librbd::io::ImageCompareAndWriteRequest;