1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #ifndef CEPH_LIBRBD_IO_OBJECT_REQUEST_H
5 #define CEPH_LIBRBD_IO_OBJECT_REQUEST_H
7 #include "include/int_types.h"
8 #include "include/buffer.h"
9 #include "include/rados/librados.hpp"
10 #include "common/snap_types.h"
11 #include "common/zipkin_trace.h"
12 #include "librbd/ObjectMap.h"
25 class ObjectRemoveRequest;
26 class ObjectTruncateRequest;
27 class ObjectWriteRequest;
28 class ObjectZeroRequest;
30 struct ObjectRequestHandle {
31 virtual ~ObjectRequestHandle() {
34 virtual void complete(int r) = 0;
35 virtual void send() = 0;
39 * This class represents an I/O operation to a single RBD data object.
40 * Its subclasses encapsulate logic for dealing with special cases
41 * for I/O due to layering.
43 template <typename ImageCtxT = ImageCtx>
44 class ObjectRequest : public ObjectRequestHandle {
46 typedef std::vector<std::pair<uint64_t, uint64_t> > Extents;
48 static ObjectRequest* create_remove(ImageCtxT *ictx,
49 const std::string &oid,
51 const ::SnapContext &snapc,
52 const ZTracer::Trace &parent_trace,
54 static ObjectRequest* create_truncate(ImageCtxT *ictx,
55 const std::string &oid,
58 const ::SnapContext &snapc,
59 const ZTracer::Trace &parent_trace,
61 static ObjectRequest* create_trim(ImageCtxT *ictx, const std::string &oid,
63 const ::SnapContext &snapc,
64 bool post_object_map_update,
66 static ObjectRequest* create_write(ImageCtxT *ictx, const std::string &oid,
69 const ceph::bufferlist &data,
70 const ::SnapContext &snapc, int op_flags,
71 const ZTracer::Trace &parent_trace,
73 static ObjectRequest* create_zero(ImageCtxT *ictx, const std::string &oid,
74 uint64_t object_no, uint64_t object_off,
76 const ::SnapContext &snapc,
77 const ZTracer::Trace &parent_trace,
79 static ObjectRequest* create_writesame(ImageCtxT *ictx,
80 const std::string &oid,
84 const ceph::bufferlist &data,
85 const ::SnapContext &snapc,
87 const ZTracer::Trace &parent_trace,
89 static ObjectRequest* create_compare_and_write(ImageCtxT *ictx,
90 const std::string &oid,
93 const ceph::bufferlist &cmp_data,
94 const ceph::bufferlist &write_data,
95 const ::SnapContext &snapc,
96 uint64_t *mismatch_offset, int op_flags,
97 const ZTracer::Trace &parent_trace,
100 ObjectRequest(ImageCtx *ictx, const std::string &oid,
101 uint64_t objectno, uint64_t off, uint64_t len,
102 librados::snap_t snap_id, bool hide_enoent,
103 const char *trace_name, const ZTracer::Trace &parent_trace,
104 Context *completion);
105 ~ObjectRequest() override {
106 m_trace.event("finish");
109 virtual void add_copyup_ops(librados::ObjectWriteOperation *wr,
113 virtual void complete(int r);
115 virtual bool should_complete(int r) = 0;
116 void send() override = 0;
118 bool has_parent() const {
122 virtual bool is_op_payload_empty() const {
126 virtual const char *get_op_type() const = 0;
127 virtual bool pre_object_map_update(uint8_t *new_state) = 0;
130 bool compute_parent_extents();
134 uint64_t m_object_no, m_object_off, m_object_len;
135 librados::snap_t m_snap_id;
136 Context *m_completion;
137 Extents m_parent_extents;
139 ZTracer::Trace m_trace;
142 bool m_has_parent = false;
145 template <typename ImageCtxT = ImageCtx>
146 class ObjectReadRequest : public ObjectRequest<ImageCtxT> {
148 typedef std::vector<std::pair<uint64_t, uint64_t> > Extents;
149 typedef std::map<uint64_t, uint64_t> ExtentMap;
151 static ObjectReadRequest* create(ImageCtxT *ictx, const std::string &oid,
152 uint64_t objectno, uint64_t offset,
153 uint64_t len, Extents &buffer_extents,
154 librados::snap_t snap_id, bool sparse,
156 const ZTracer::Trace &parent_trace,
157 Context *completion) {
158 return new ObjectReadRequest(ictx, oid, objectno, offset, len,
159 buffer_extents, snap_id, sparse, op_flags,
160 parent_trace, completion);
163 ObjectReadRequest(ImageCtxT *ictx, const std::string &oid,
164 uint64_t objectno, uint64_t offset, uint64_t len,
165 Extents& buffer_extents, librados::snap_t snap_id,
166 bool sparse, int op_flags,
167 const ZTracer::Trace &parent_trace, Context *completion);
169 bool should_complete(int r) override;
170 void send() override;
173 inline uint64_t get_offset() const {
174 return this->m_object_off;
176 inline uint64_t get_length() const {
177 return this->m_object_len;
179 ceph::bufferlist &data() {
182 const Extents &get_buffer_extents() const {
183 return m_buffer_extents;
185 ExtentMap &get_extent_map() {
189 const char *get_op_type() const override {
193 bool pre_object_map_update(uint8_t *new_state) override {
198 Extents m_buffer_extents;
202 ceph::bufferlist m_read_data;
206 * Reads go through the following state machine to deal with
210 * LIBRBD_AIO_READ_GUARD ---------------> LIBRBD_AIO_READ_COPYUP
213 * done <------------------------------------/
216 * LIBRBD_AIO_READ_FLAT
218 * Reads start in LIBRBD_AIO_READ_GUARD or _FLAT, depending on
219 * whether there is a parent or not.
222 LIBRBD_AIO_READ_GUARD,
223 LIBRBD_AIO_READ_COPYUP,
227 read_state_d m_state;
231 void read_from_parent(Extents&& image_extents);
234 class AbstractObjectWriteRequest : public ObjectRequest<> {
236 AbstractObjectWriteRequest(ImageCtx *ictx, const std::string &oid,
237 uint64_t object_no, uint64_t object_off,
238 uint64_t len, const ::SnapContext &snapc,
239 bool hide_enoent, const char *trace_name,
240 const ZTracer::Trace &parent_trace,
241 Context *completion);
243 void add_copyup_ops(librados::ObjectWriteOperation *wr,
244 bool set_hints) override
246 add_write_ops(wr, set_hints);
249 bool should_complete(int r) override;
250 void send() override;
253 * Writes go through the following state machine to deal with
254 * layering and the object map:
260 * | ---------------------------------> LIBRBD_AIO_WRITE_PRE
264 * | . . . . > LIBRBD_AIO_WRITE_FLAT. . .
268 * v need copyup (copyup performs pre) | .
269 * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP | .
274 * . \-------------------\ | /-------------------/ .
277 * . LIBRBD_AIO_WRITE_POST . .
279 * . | . . . . . . . . .
282 * . . . . . . . . . . . . . . > <finish> < . . . . . . . . . . . . . .
284 * The _PRE/_POST states are skipped if the object map is disabled.
285 * The write starts in _WRITE_GUARD or _FLAT depending on whether or not
286 * there is a parent overlap.
290 LIBRBD_AIO_WRITE_GUARD,
291 LIBRBD_AIO_WRITE_COPYUP,
292 LIBRBD_AIO_WRITE_FLAT,
293 LIBRBD_AIO_WRITE_PRE,
294 LIBRBD_AIO_WRITE_POST,
295 LIBRBD_AIO_WRITE_ERROR
298 write_state_d m_state;
299 librados::ObjectWriteOperation m_write;
301 std::vector<librados::snap_t> m_snaps;
305 virtual void add_write_ops(librados::ObjectWriteOperation *wr,
307 virtual void guard_write();
308 virtual bool post_object_map_update() {
311 virtual void send_write();
312 virtual void send_write_op();
313 virtual void handle_write_guard();
315 void send_pre_object_map_update();
318 bool send_post_object_map_update();
322 class ObjectWriteRequest : public AbstractObjectWriteRequest {
324 ObjectWriteRequest(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
325 uint64_t object_off, const ceph::bufferlist &data,
326 const ::SnapContext &snapc, int op_flags,
327 const ZTracer::Trace &parent_trace, Context *completion)
328 : AbstractObjectWriteRequest(ictx, oid, object_no, object_off,
329 data.length(), snapc, false, "write",
330 parent_trace, completion),
331 m_write_data(data), m_op_flags(op_flags) {
334 bool is_op_payload_empty() const override {
335 return (m_write_data.length() == 0);
338 const char *get_op_type() const override {
342 bool pre_object_map_update(uint8_t *new_state) override {
343 *new_state = OBJECT_EXISTS;
348 void add_write_ops(librados::ObjectWriteOperation *wr,
349 bool set_hints) override;
351 void send_write() override;
354 ceph::bufferlist m_write_data;
358 class ObjectRemoveRequest : public AbstractObjectWriteRequest {
360 ObjectRemoveRequest(ImageCtx *ictx, const std::string &oid,
361 uint64_t object_no, const ::SnapContext &snapc,
362 const ZTracer::Trace &parent_trace, Context *completion)
363 : AbstractObjectWriteRequest(ictx, oid, object_no, 0, 0, snapc, true,
364 "remote", parent_trace, completion),
365 m_object_state(OBJECT_NONEXISTENT) {
368 const char* get_op_type() const override {
370 return "remove (trunc)";
375 bool pre_object_map_update(uint8_t *new_state) override {
377 m_object_state = OBJECT_EXISTS;
379 m_object_state = OBJECT_PENDING;
381 *new_state = m_object_state;
385 bool post_object_map_update() override {
386 if (m_object_state == OBJECT_EXISTS) {
392 void guard_write() override;
393 void send_write() override;
396 void add_write_ops(librados::ObjectWriteOperation *wr,
397 bool set_hints) override {
406 uint8_t m_object_state;
409 class ObjectTrimRequest : public AbstractObjectWriteRequest {
411 // we'd need to only conditionally specify if a post object map
412 // update is needed. pre update is decided as usual (by checking
413 // the state of the object in the map).
414 ObjectTrimRequest(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
415 const ::SnapContext &snapc, bool post_object_map_update,
417 : AbstractObjectWriteRequest(ictx, oid, object_no, 0, 0, snapc, true,
418 "trim", {}, completion),
419 m_post_object_map_update(post_object_map_update) {
422 const char* get_op_type() const override {
423 return "remove (trim)";
426 bool pre_object_map_update(uint8_t *new_state) override {
427 *new_state = OBJECT_PENDING;
431 bool post_object_map_update() override {
432 return m_post_object_map_update;
436 void add_write_ops(librados::ObjectWriteOperation *wr,
437 bool set_hints) override {
442 bool m_post_object_map_update;
445 class ObjectTruncateRequest : public AbstractObjectWriteRequest {
447 ObjectTruncateRequest(ImageCtx *ictx, const std::string &oid,
448 uint64_t object_no, uint64_t object_off,
449 const ::SnapContext &snapc,
450 const ZTracer::Trace &parent_trace, Context *completion)
451 : AbstractObjectWriteRequest(ictx, oid, object_no, object_off, 0, snapc,
452 true, "truncate", parent_trace, completion) {
455 const char* get_op_type() const override {
459 bool pre_object_map_update(uint8_t *new_state) override {
460 if (!m_object_exist && !has_parent())
461 *new_state = OBJECT_NONEXISTENT;
463 *new_state = OBJECT_EXISTS;
467 void send_write() override;
470 void add_write_ops(librados::ObjectWriteOperation *wr,
471 bool set_hints) override {
472 wr->truncate(m_object_off);
476 class ObjectZeroRequest : public AbstractObjectWriteRequest {
478 ObjectZeroRequest(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
479 uint64_t object_off, uint64_t object_len,
480 const ::SnapContext &snapc,
481 const ZTracer::Trace &parent_trace, Context *completion)
482 : AbstractObjectWriteRequest(ictx, oid, object_no, object_off, object_len,
483 snapc, true, "zero", parent_trace,
487 const char* get_op_type() const override {
491 bool pre_object_map_update(uint8_t *new_state) override {
492 *new_state = OBJECT_EXISTS;
496 void send_write() override;
499 void add_write_ops(librados::ObjectWriteOperation *wr,
500 bool set_hints) override {
501 wr->zero(m_object_off, m_object_len);
505 class ObjectWriteSameRequest : public AbstractObjectWriteRequest {
507 ObjectWriteSameRequest(ImageCtx *ictx, const std::string &oid,
508 uint64_t object_no, uint64_t object_off,
509 uint64_t object_len, const ceph::bufferlist &data,
510 const ::SnapContext &snapc, int op_flags,
511 const ZTracer::Trace &parent_trace,
513 : AbstractObjectWriteRequest(ictx, oid, object_no, object_off,
514 object_len, snapc, false, "writesame",
515 parent_trace, completion),
516 m_write_data(data), m_op_flags(op_flags) {
519 const char *get_op_type() const override {
523 bool pre_object_map_update(uint8_t *new_state) override {
524 *new_state = OBJECT_EXISTS;
529 void add_write_ops(librados::ObjectWriteOperation *wr,
530 bool set_hints) override;
532 void send_write() override;
535 ceph::bufferlist m_write_data;
539 class ObjectCompareAndWriteRequest : public AbstractObjectWriteRequest {
541 typedef std::vector<std::pair<uint64_t, uint64_t> > Extents;
543 ObjectCompareAndWriteRequest(ImageCtx *ictx, const std::string &oid,
544 uint64_t object_no, uint64_t object_off,
545 const ceph::bufferlist &cmp_bl,
546 const ceph::bufferlist &write_bl,
547 const ::SnapContext &snapc,
548 uint64_t *mismatch_offset, int op_flags,
549 const ZTracer::Trace &parent_trace,
551 : AbstractObjectWriteRequest(ictx, oid, object_no, object_off,
552 cmp_bl.length(), snapc, false, "compare_and_write",
553 parent_trace, completion),
554 m_cmp_bl(cmp_bl), m_write_bl(write_bl),
555 m_mismatch_offset(mismatch_offset), m_op_flags(op_flags) {
558 const char *get_op_type() const override {
559 return "compare_and_write";
562 bool pre_object_map_update(uint8_t *new_state) override {
563 *new_state = OBJECT_EXISTS;
567 void complete(int r) override;
569 void add_write_ops(librados::ObjectWriteOperation *wr,
570 bool set_hints) override;
572 void send_write() override;
575 ceph::bufferlist m_cmp_bl;
576 ceph::bufferlist m_write_bl;
577 uint64_t *m_mismatch_offset;
582 } // namespace librbd
584 extern template class librbd::io::ObjectRequest<librbd::ImageCtx>;
585 extern template class librbd::io::ObjectReadRequest<librbd::ImageCtx>;
587 #endif // CEPH_LIBRBD_IO_OBJECT_REQUEST_H