// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab #ifndef CEPH_LIBRBD_IO_OBJECT_REQUEST_H #define CEPH_LIBRBD_IO_OBJECT_REQUEST_H #include "include/int_types.h" #include "include/buffer.h" #include "include/rados/librados.hpp" #include "common/snap_types.h" #include "common/zipkin_trace.h" #include "librbd/ObjectMap.h" #include class Context; namespace librbd { struct ImageCtx; namespace io { struct AioCompletion; class CopyupRequest; class ObjectRemoveRequest; class ObjectTruncateRequest; class ObjectWriteRequest; class ObjectZeroRequest; struct ObjectRequestHandle { virtual ~ObjectRequestHandle() { } virtual void complete(int r) = 0; virtual void send() = 0; }; /** * This class represents an I/O operation to a single RBD data object. * Its subclasses encapsulate logic for dealing with special cases * for I/O due to layering. */ template class ObjectRequest : public ObjectRequestHandle { public: typedef std::vector > Extents; static ObjectRequest* create_remove(ImageCtxT *ictx, const std::string &oid, uint64_t object_no, const ::SnapContext &snapc, const ZTracer::Trace &parent_trace, Context *completion); static ObjectRequest* create_truncate(ImageCtxT *ictx, const std::string &oid, uint64_t object_no, uint64_t object_off, const ::SnapContext &snapc, const ZTracer::Trace &parent_trace, Context *completion); static ObjectRequest* create_trim(ImageCtxT *ictx, const std::string &oid, uint64_t object_no, const ::SnapContext &snapc, bool post_object_map_update, Context *completion); static ObjectRequest* create_write(ImageCtxT *ictx, const std::string &oid, uint64_t object_no, uint64_t object_off, const ceph::bufferlist &data, const ::SnapContext &snapc, int op_flags, const ZTracer::Trace &parent_trace, Context *completion); static ObjectRequest* create_zero(ImageCtxT *ictx, const std::string &oid, uint64_t object_no, uint64_t object_off, uint64_t object_len, const ::SnapContext &snapc, const ZTracer::Trace &parent_trace, Context *completion); static ObjectRequest* create_writesame(ImageCtxT *ictx, const std::string &oid, uint64_t object_no, uint64_t object_off, uint64_t object_len, const ceph::bufferlist &data, const ::SnapContext &snapc, int op_flags, const ZTracer::Trace &parent_trace, Context *completion); static ObjectRequest* create_compare_and_write(ImageCtxT *ictx, const std::string &oid, uint64_t object_no, uint64_t object_off, const ceph::bufferlist &cmp_data, const ceph::bufferlist &write_data, const ::SnapContext &snapc, uint64_t *mismatch_offset, int op_flags, const ZTracer::Trace &parent_trace, Context *completion); ObjectRequest(ImageCtx *ictx, const std::string &oid, uint64_t objectno, uint64_t off, uint64_t len, librados::snap_t snap_id, bool hide_enoent, const char *trace_name, const ZTracer::Trace &parent_trace, Context *completion); ~ObjectRequest() override { m_trace.event("finish"); } virtual void add_copyup_ops(librados::ObjectWriteOperation *wr, bool set_hints) { }; virtual void complete(int r); virtual bool should_complete(int r) = 0; void send() override = 0; bool has_parent() const { return m_has_parent; } virtual bool is_op_payload_empty() const { return false; } virtual const char *get_op_type() const = 0; virtual bool pre_object_map_update(uint8_t *new_state) = 0; protected: bool compute_parent_extents(); ImageCtx *m_ictx; std::string m_oid; uint64_t m_object_no, m_object_off, m_object_len; librados::snap_t m_snap_id; Context *m_completion; Extents m_parent_extents; bool m_hide_enoent; ZTracer::Trace m_trace; private: bool m_has_parent = false; }; template class ObjectReadRequest : public ObjectRequest { public: typedef std::vector > Extents; typedef std::map ExtentMap; static ObjectReadRequest* create(ImageCtxT *ictx, const std::string &oid, uint64_t objectno, uint64_t offset, uint64_t len, Extents &buffer_extents, librados::snap_t snap_id, bool sparse, int op_flags, const ZTracer::Trace &parent_trace, Context *completion) { return new ObjectReadRequest(ictx, oid, objectno, offset, len, buffer_extents, snap_id, sparse, op_flags, parent_trace, completion); } ObjectReadRequest(ImageCtxT *ictx, const std::string &oid, uint64_t objectno, uint64_t offset, uint64_t len, Extents& buffer_extents, librados::snap_t snap_id, bool sparse, int op_flags, const ZTracer::Trace &parent_trace, Context *completion); bool should_complete(int r) override; void send() override; void guard_read(); inline uint64_t get_offset() const { return this->m_object_off; } inline uint64_t get_length() const { return this->m_object_len; } ceph::bufferlist &data() { return m_read_data; } const Extents &get_buffer_extents() const { return m_buffer_extents; } ExtentMap &get_extent_map() { return m_ext_map; } const char *get_op_type() const override { return "read"; } bool pre_object_map_update(uint8_t *new_state) override { return false; } private: Extents m_buffer_extents; bool m_tried_parent; bool m_sparse; int m_op_flags; ceph::bufferlist m_read_data; ExtentMap m_ext_map; /** * Reads go through the following state machine to deal with * layering: * * need copyup * LIBRBD_AIO_READ_GUARD ---------------> LIBRBD_AIO_READ_COPYUP * | | * v | * done <------------------------------------/ * ^ * | * LIBRBD_AIO_READ_FLAT * * Reads start in LIBRBD_AIO_READ_GUARD or _FLAT, depending on * whether there is a parent or not. */ enum read_state_d { LIBRBD_AIO_READ_GUARD, LIBRBD_AIO_READ_COPYUP, LIBRBD_AIO_READ_FLAT }; read_state_d m_state; void send_copyup(); void read_from_parent(Extents&& image_extents); }; class AbstractObjectWriteRequest : public ObjectRequest<> { public: AbstractObjectWriteRequest(ImageCtx *ictx, const std::string &oid, uint64_t object_no, uint64_t object_off, uint64_t len, const ::SnapContext &snapc, bool hide_enoent, const char *trace_name, const ZTracer::Trace &parent_trace, Context *completion); void add_copyup_ops(librados::ObjectWriteOperation *wr, bool set_hints) override { add_write_ops(wr, set_hints); } bool should_complete(int r) override; void send() override; /** * Writes go through the following state machine to deal with * layering and the object map: * * * | * |\ * | \ -or- * | ---------------------------------> LIBRBD_AIO_WRITE_PRE * | . | * | . | * | . v * | . . . . > LIBRBD_AIO_WRITE_FLAT. . . * | | . * | | . * | | . * v need copyup (copyup performs pre) | . * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP | . * . | | . | . * . | | . | . * . | /-----/ . | . * . | | . | . * . \-------------------\ | /-------------------/ . * . | | | . . * . v v v . . * . LIBRBD_AIO_WRITE_POST . . * . | . . * . | . . . . . . . . . * . | . . * . v v . * . . . . . . . . . . . . . . > < . . . . . . . . . . . . . . * * The _PRE/_POST states are skipped if the object map is disabled. * The write starts in _WRITE_GUARD or _FLAT depending on whether or not * there is a parent overlap. */ protected: enum write_state_d { LIBRBD_AIO_WRITE_GUARD, LIBRBD_AIO_WRITE_COPYUP, LIBRBD_AIO_WRITE_FLAT, LIBRBD_AIO_WRITE_PRE, LIBRBD_AIO_WRITE_POST, LIBRBD_AIO_WRITE_ERROR }; write_state_d m_state; librados::ObjectWriteOperation m_write; uint64_t m_snap_seq; std::vector m_snaps; bool m_object_exist; bool m_guard = true; virtual void add_write_ops(librados::ObjectWriteOperation *wr, bool set_hints) = 0; virtual void guard_write(); virtual bool post_object_map_update() { return false; } virtual void send_write(); virtual void send_write_op(); virtual void handle_write_guard(); void send_pre_object_map_update(); private: bool send_post_object_map_update(); void send_copyup(); }; class ObjectWriteRequest : public AbstractObjectWriteRequest { public: ObjectWriteRequest(ImageCtx *ictx, const std::string &oid, uint64_t object_no, uint64_t object_off, const ceph::bufferlist &data, const ::SnapContext &snapc, int op_flags, const ZTracer::Trace &parent_trace, Context *completion) : AbstractObjectWriteRequest(ictx, oid, object_no, object_off, data.length(), snapc, false, "write", parent_trace, completion), m_write_data(data), m_op_flags(op_flags) { } bool is_op_payload_empty() const override { return (m_write_data.length() == 0); } const char *get_op_type() const override { return "write"; } bool pre_object_map_update(uint8_t *new_state) override { *new_state = OBJECT_EXISTS; return true; } protected: void add_write_ops(librados::ObjectWriteOperation *wr, bool set_hints) override; void send_write() override; private: ceph::bufferlist m_write_data; int m_op_flags; }; class ObjectRemoveRequest : public AbstractObjectWriteRequest { public: ObjectRemoveRequest(ImageCtx *ictx, const std::string &oid, uint64_t object_no, const ::SnapContext &snapc, const ZTracer::Trace &parent_trace, Context *completion) : AbstractObjectWriteRequest(ictx, oid, object_no, 0, 0, snapc, true, "remote", parent_trace, completion), m_object_state(OBJECT_NONEXISTENT) { } const char* get_op_type() const override { if (has_parent()) { return "remove (trunc)"; } return "remove"; } bool pre_object_map_update(uint8_t *new_state) override { if (has_parent()) { m_object_state = OBJECT_EXISTS; } else { m_object_state = OBJECT_PENDING; } *new_state = m_object_state; return true; } bool post_object_map_update() override { if (m_object_state == OBJECT_EXISTS) { return false; } return true; } void guard_write() override; void send_write() override; protected: void add_write_ops(librados::ObjectWriteOperation *wr, bool set_hints) override { if (has_parent()) { wr->truncate(0); } else { wr->remove(); } } private: uint8_t m_object_state; }; class ObjectTrimRequest : public AbstractObjectWriteRequest { public: // we'd need to only conditionally specify if a post object map // update is needed. pre update is decided as usual (by checking // the state of the object in the map). ObjectTrimRequest(ImageCtx *ictx, const std::string &oid, uint64_t object_no, const ::SnapContext &snapc, bool post_object_map_update, Context *completion) : AbstractObjectWriteRequest(ictx, oid, object_no, 0, 0, snapc, true, "trim", {}, completion), m_post_object_map_update(post_object_map_update) { } const char* get_op_type() const override { return "remove (trim)"; } bool pre_object_map_update(uint8_t *new_state) override { *new_state = OBJECT_PENDING; return true; } bool post_object_map_update() override { return m_post_object_map_update; } protected: void add_write_ops(librados::ObjectWriteOperation *wr, bool set_hints) override { wr->remove(); } private: bool m_post_object_map_update; }; class ObjectTruncateRequest : public AbstractObjectWriteRequest { public: ObjectTruncateRequest(ImageCtx *ictx, const std::string &oid, uint64_t object_no, uint64_t object_off, const ::SnapContext &snapc, const ZTracer::Trace &parent_trace, Context *completion) : AbstractObjectWriteRequest(ictx, oid, object_no, object_off, 0, snapc, true, "truncate", parent_trace, completion) { } const char* get_op_type() const override { return "truncate"; } bool pre_object_map_update(uint8_t *new_state) override { if (!m_object_exist && !has_parent()) *new_state = OBJECT_NONEXISTENT; else *new_state = OBJECT_EXISTS; return true; } void send_write() override; protected: void add_write_ops(librados::ObjectWriteOperation *wr, bool set_hints) override { wr->truncate(m_object_off); } }; class ObjectZeroRequest : public AbstractObjectWriteRequest { public: ObjectZeroRequest(ImageCtx *ictx, const std::string &oid, uint64_t object_no, uint64_t object_off, uint64_t object_len, const ::SnapContext &snapc, const ZTracer::Trace &parent_trace, Context *completion) : AbstractObjectWriteRequest(ictx, oid, object_no, object_off, object_len, snapc, true, "zero", parent_trace, completion) { } const char* get_op_type() const override { return "zero"; } bool pre_object_map_update(uint8_t *new_state) override { *new_state = OBJECT_EXISTS; return true; } void send_write() override; protected: void add_write_ops(librados::ObjectWriteOperation *wr, bool set_hints) override { wr->zero(m_object_off, m_object_len); } }; class ObjectWriteSameRequest : public AbstractObjectWriteRequest { public: ObjectWriteSameRequest(ImageCtx *ictx, const std::string &oid, uint64_t object_no, uint64_t object_off, uint64_t object_len, const ceph::bufferlist &data, const ::SnapContext &snapc, int op_flags, const ZTracer::Trace &parent_trace, Context *completion) : AbstractObjectWriteRequest(ictx, oid, object_no, object_off, object_len, snapc, false, "writesame", parent_trace, completion), m_write_data(data), m_op_flags(op_flags) { } const char *get_op_type() const override { return "writesame"; } bool pre_object_map_update(uint8_t *new_state) override { *new_state = OBJECT_EXISTS; return true; } protected: void add_write_ops(librados::ObjectWriteOperation *wr, bool set_hints) override; void send_write() override; private: ceph::bufferlist m_write_data; int m_op_flags; }; class ObjectCompareAndWriteRequest : public AbstractObjectWriteRequest { public: typedef std::vector > Extents; ObjectCompareAndWriteRequest(ImageCtx *ictx, const std::string &oid, uint64_t object_no, uint64_t object_off, const ceph::bufferlist &cmp_bl, const ceph::bufferlist &write_bl, const ::SnapContext &snapc, uint64_t *mismatch_offset, int op_flags, const ZTracer::Trace &parent_trace, Context *completion) : AbstractObjectWriteRequest(ictx, oid, object_no, object_off, cmp_bl.length(), snapc, false, "compare_and_write", parent_trace, completion), m_cmp_bl(cmp_bl), m_write_bl(write_bl), m_mismatch_offset(mismatch_offset), m_op_flags(op_flags) { } const char *get_op_type() const override { return "compare_and_write"; } bool pre_object_map_update(uint8_t *new_state) override { *new_state = OBJECT_EXISTS; return true; } void complete(int r) override; protected: void add_write_ops(librados::ObjectWriteOperation *wr, bool set_hints) override; void send_write() override; private: ceph::bufferlist m_cmp_bl; ceph::bufferlist m_write_bl; uint64_t *m_mismatch_offset; int m_op_flags; }; } // namespace io } // namespace librbd extern template class librbd::io::ObjectRequest; extern template class librbd::io::ObjectReadRequest; #endif // CEPH_LIBRBD_IO_OBJECT_REQUEST_H