Fix some bugs when testing opensds ansible
[stor4nfv.git] / src / ceph / src / librbd / io / ImageRequest.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "librbd/io/ImageRequest.h"
5 #include "librbd/ImageCtx.h"
6 #include "librbd/internal.h"
7 #include "librbd/Journal.h"
8 #include "librbd/Utils.h"
9 #include "librbd/cache/ImageCache.h"
10 #include "librbd/io/AioCompletion.h"
11 #include "librbd/io/ObjectRequest.h"
12 #include "librbd/journal/Types.h"
13 #include "include/rados/librados.hpp"
14 #include "common/WorkQueue.h"
15 #include "osdc/Striper.h"
16
17 #define dout_subsys ceph_subsys_rbd
18 #undef dout_prefix
19 #define dout_prefix *_dout << "librbd::io::ImageRequest: " << this \
20                            << " " << __func__ << ": "
21
22 namespace librbd {
23 namespace io {
24
25 using util::get_image_ctx;
26
27 namespace {
28
29 template <typename ImageCtxT = ImageCtx>
30 struct C_DiscardJournalCommit : public Context {
31   typedef std::vector<ObjectExtent> ObjectExtents;
32
33   ImageCtxT &image_ctx;
34   AioCompletion *aio_comp;
35   ObjectExtents object_extents;
36
37   C_DiscardJournalCommit(ImageCtxT &_image_ctx, AioCompletion *_aio_comp,
38                          const ObjectExtents &_object_extents, uint64_t tid)
39     : image_ctx(_image_ctx), aio_comp(_aio_comp),
40       object_extents(_object_extents) {
41     CephContext *cct = image_ctx.cct;
42     ldout(cct, 20) << "delaying cache discard until journal tid " << tid << " "
43                    << "safe" << dendl;
44
45     aio_comp->add_request();
46   }
47
48   void finish(int r) override {
49     CephContext *cct = image_ctx.cct;
50     ldout(cct, 20) << "C_DiscardJournalCommit: "
51                    << "journal committed: discarding from cache" << dendl;
52
53     Mutex::Locker cache_locker(image_ctx.cache_lock);
54     image_ctx.object_cacher->discard_set(image_ctx.object_set, object_extents);
55     aio_comp->complete_request(r);
56   }
57 };
58
59 template <typename ImageCtxT = ImageCtx>
60 struct C_FlushJournalCommit : public Context {
61   ImageCtxT &image_ctx;
62   AioCompletion *aio_comp;
63
64   C_FlushJournalCommit(ImageCtxT &_image_ctx, AioCompletion *_aio_comp,
65                        uint64_t tid)
66     : image_ctx(_image_ctx), aio_comp(_aio_comp) {
67     CephContext *cct = image_ctx.cct;
68     ldout(cct, 20) << "delaying flush until journal tid " << tid << " "
69                    << "safe" << dendl;
70
71     aio_comp->add_request();
72   }
73
74   void finish(int r) override {
75     CephContext *cct = image_ctx.cct;
76     ldout(cct, 20) << "C_FlushJournalCommit: journal committed" << dendl;
77     aio_comp->complete_request(r);
78   }
79 };
80
81 template <typename ImageCtxT>
82 class C_ObjectCacheRead : public Context {
83 public:
84   explicit C_ObjectCacheRead(ImageCtxT &ictx, ObjectReadRequest<ImageCtxT> *req)
85     : m_image_ctx(ictx), m_req(req), m_enqueued(false) {}
86
87   void complete(int r) override {
88     if (!m_enqueued) {
89       // cache_lock creates a lock ordering issue -- so re-execute this context
90       // outside the cache_lock
91       m_enqueued = true;
92       m_image_ctx.op_work_queue->queue(this, r);
93       return;
94     }
95     Context::complete(r);
96   }
97
98 protected:
99   void finish(int r) override {
100     m_req->complete(r);
101   }
102
103 private:
104   ImageCtxT &m_image_ctx;
105   ObjectReadRequest<ImageCtxT> *m_req;
106   bool m_enqueued;
107 };
108
109 } // anonymous namespace
110
111 template <typename I>
112 ImageRequest<I>* ImageRequest<I>::create_read_request(
113     I &image_ctx, AioCompletion *aio_comp, Extents &&image_extents,
114     ReadResult &&read_result, int op_flags,
115     const ZTracer::Trace &parent_trace) {
116   return new ImageReadRequest<I>(image_ctx, aio_comp,
117                                  std::move(image_extents),
118                                  std::move(read_result), op_flags,
119                                  parent_trace);
120 }
121
122 template <typename I>
123 ImageRequest<I>* ImageRequest<I>::create_write_request(
124     I &image_ctx, AioCompletion *aio_comp, Extents &&image_extents,
125     bufferlist &&bl, int op_flags, const ZTracer::Trace &parent_trace) {
126   return new ImageWriteRequest<I>(image_ctx, aio_comp, std::move(image_extents),
127                                   std::move(bl), op_flags, parent_trace);
128 }
129
130 template <typename I>
131 ImageRequest<I>* ImageRequest<I>::create_discard_request(
132     I &image_ctx, AioCompletion *aio_comp, uint64_t off, uint64_t len,
133     bool skip_partial_discard, const ZTracer::Trace &parent_trace) {
134   return new ImageDiscardRequest<I>(image_ctx, aio_comp, off, len,
135                                     skip_partial_discard, parent_trace);
136 }
137
138 template <typename I>
139 ImageRequest<I>* ImageRequest<I>::create_flush_request(
140     I &image_ctx, AioCompletion *aio_comp,
141     const ZTracer::Trace &parent_trace) {
142   return new ImageFlushRequest<I>(image_ctx, aio_comp, parent_trace);
143 }
144
145 template <typename I>
146 ImageRequest<I>* ImageRequest<I>::create_writesame_request(
147     I &image_ctx, AioCompletion *aio_comp, uint64_t off, uint64_t len,
148     bufferlist &&bl, int op_flags, const ZTracer::Trace &parent_trace) {
149   return new ImageWriteSameRequest<I>(image_ctx, aio_comp, off, len,
150                                       std::move(bl), op_flags, parent_trace);
151 }
152
153 template <typename I>
154 ImageRequest<I>* ImageRequest<I>::create_compare_and_write_request(
155     I &image_ctx, AioCompletion *c, Extents &&image_extents,
156     bufferlist &&cmp_bl, bufferlist &&bl, uint64_t *mismatch_offset,
157     int op_flags, const ZTracer::Trace &parent_trace) {
158   return new ImageCompareAndWriteRequest<I>(image_ctx, c,
159                                             std::move(image_extents),
160                                             std::move(cmp_bl),
161                                             std::move(bl), mismatch_offset,
162                                             op_flags, parent_trace);
163 }
164
165 template <typename I>
166 void ImageRequest<I>::aio_read(I *ictx, AioCompletion *c,
167                                Extents &&image_extents,
168                                ReadResult &&read_result, int op_flags,
169                                const ZTracer::Trace &parent_trace) {
170   ImageReadRequest<I> req(*ictx, c, std::move(image_extents),
171                           std::move(read_result), op_flags, parent_trace);
172   req.send();
173 }
174
175 template <typename I>
176 void ImageRequest<I>::aio_write(I *ictx, AioCompletion *c,
177                                 Extents &&image_extents, bufferlist &&bl,
178                                 int op_flags,
179                                 const ZTracer::Trace &parent_trace) {
180   ImageWriteRequest<I> req(*ictx, c, std::move(image_extents), std::move(bl),
181                            op_flags, parent_trace);
182   req.send();
183 }
184
185 template <typename I>
186 void ImageRequest<I>::aio_discard(I *ictx, AioCompletion *c,
187                                   uint64_t off, uint64_t len,
188                                   bool skip_partial_discard,
189                                   const ZTracer::Trace &parent_trace) {
190   ImageDiscardRequest<I> req(*ictx, c, off, len, skip_partial_discard,
191                              parent_trace);
192   req.send();
193 }
194
195 template <typename I>
196 void ImageRequest<I>::aio_flush(I *ictx, AioCompletion *c,
197                                 const ZTracer::Trace &parent_trace) {
198   ImageFlushRequest<I> req(*ictx, c, parent_trace);
199   req.send();
200 }
201
202 template <typename I>
203 void ImageRequest<I>::aio_writesame(I *ictx, AioCompletion *c,
204                                     uint64_t off, uint64_t len,
205                                     bufferlist &&bl, int op_flags,
206                                     const ZTracer::Trace &parent_trace) {
207   ImageWriteSameRequest<I> req(*ictx, c, off, len, std::move(bl), op_flags,
208                                parent_trace);
209   req.send();
210 }
211
212 template <typename I>
213 void ImageRequest<I>::aio_compare_and_write(I *ictx, AioCompletion *c,
214                                             Extents &&image_extents,
215                                             bufferlist &&cmp_bl,
216                                             bufferlist &&bl,
217                                             uint64_t *mismatch_offset,
218                                             int op_flags,
219                                             const ZTracer::Trace &parent_trace) {
220   ImageCompareAndWriteRequest<I> req(*ictx, c, std::move(image_extents),
221                                      std::move(cmp_bl), std::move(bl),
222                                      mismatch_offset, op_flags, parent_trace);
223   req.send();
224 }
225
226
227 template <typename I>
228 void ImageRequest<I>::send() {
229   I &image_ctx = this->m_image_ctx;
230   assert(m_aio_comp->is_initialized(get_aio_type()));
231   assert(m_aio_comp->is_started() ^ (get_aio_type() == AIO_TYPE_FLUSH));
232
233   CephContext *cct = image_ctx.cct;
234   AioCompletion *aio_comp = this->m_aio_comp;
235   ldout(cct, 20) << get_request_type() << ": ictx=" << &image_ctx << ", "
236                  << "completion=" << aio_comp << dendl;
237
238   aio_comp->get();
239   int r = clip_request();
240   if (r < 0) {
241     m_aio_comp->fail(r);
242     return;
243   }
244
245   if (m_bypass_image_cache || m_image_ctx.image_cache == nullptr) {
246     send_request();
247   } else {
248     send_image_cache_request();
249   }
250 }
251
252 template <typename I>
253 int ImageRequest<I>::clip_request() {
254   RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
255   for (auto &image_extent : m_image_extents) {
256     auto clip_len = image_extent.second;
257     int r = clip_io(get_image_ctx(&m_image_ctx), image_extent.first, &clip_len);
258     if (r < 0) {
259       return r;
260     }
261
262     image_extent.second = clip_len;
263   }
264   return 0;
265 }
266
267 template <typename I>
268 void ImageRequest<I>::start_op() {
269   m_aio_comp->start_op();
270 }
271
272 template <typename I>
273 void ImageRequest<I>::fail(int r) {
274   AioCompletion *aio_comp = this->m_aio_comp;
275   aio_comp->get();
276   aio_comp->fail(r);
277 }
278
279 template <typename I>
280 ImageReadRequest<I>::ImageReadRequest(I &image_ctx, AioCompletion *aio_comp,
281                                       Extents &&image_extents,
282                                       ReadResult &&read_result, int op_flags,
283                                       const ZTracer::Trace &parent_trace)
284   : ImageRequest<I>(image_ctx, aio_comp, std::move(image_extents), "read",
285                     parent_trace),
286     m_op_flags(op_flags) {
287   aio_comp->read_result = std::move(read_result);
288 }
289
290 template <typename I>
291 int ImageReadRequest<I>::clip_request() {
292   int r = ImageRequest<I>::clip_request();
293   if (r < 0) {
294     return r;
295   }
296
297   uint64_t buffer_length = 0;
298   auto &image_extents = this->m_image_extents;
299   for (auto &image_extent : image_extents) {
300     buffer_length += image_extent.second;
301   }
302   this->m_aio_comp->read_result.set_clip_length(buffer_length);
303   return 0;
304 }
305
306 template <typename I>
307 void ImageReadRequest<I>::send_request() {
308   I &image_ctx = this->m_image_ctx;
309   CephContext *cct = image_ctx.cct;
310
311   auto &image_extents = this->m_image_extents;
312   if (image_ctx.object_cacher && image_ctx.readahead_max_bytes > 0 &&
313       !(m_op_flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM)) {
314     readahead(get_image_ctx(&image_ctx), image_extents);
315   }
316
317   AioCompletion *aio_comp = this->m_aio_comp;
318   librados::snap_t snap_id;
319   map<object_t,vector<ObjectExtent> > object_extents;
320   uint64_t buffer_ofs = 0;
321   {
322     // prevent image size from changing between computing clip and recording
323     // pending async operation
324     RWLock::RLocker snap_locker(image_ctx.snap_lock);
325     snap_id = image_ctx.snap_id;
326
327     // map image extents to object extents
328     for (auto &extent : image_extents) {
329       if (extent.second == 0) {
330         continue;
331       }
332
333       Striper::file_to_extents(cct, image_ctx.format_string, &image_ctx.layout,
334                                extent.first, extent.second, 0, object_extents,
335                                buffer_ofs);
336       buffer_ofs += extent.second;
337     }
338   }
339
340   // pre-calculate the expected number of read requests
341   uint32_t request_count = 0;
342   for (auto &object_extent : object_extents) {
343     request_count += object_extent.second.size();
344   }
345   aio_comp->set_request_count(request_count);
346
347   // issue the requests
348   for (auto &object_extent : object_extents) {
349     for (auto &extent : object_extent.second) {
350       ldout(cct, 20) << "oid " << extent.oid << " " << extent.offset << "~"
351                      << extent.length << " from " << extent.buffer_extents
352                      << dendl;
353
354       auto req_comp = new io::ReadResult::C_SparseReadRequest<I>(
355         aio_comp);
356       ObjectReadRequest<I> *req = ObjectReadRequest<I>::create(
357         &image_ctx, extent.oid.name, extent.objectno, extent.offset,
358         extent.length, extent.buffer_extents, snap_id, true, m_op_flags,
359         this->m_trace, req_comp);
360       req_comp->request = req;
361
362       if (image_ctx.object_cacher) {
363         C_ObjectCacheRead<I> *cache_comp = new C_ObjectCacheRead<I>(image_ctx,
364                                                                     req);
365         image_ctx.aio_read_from_cache(
366           extent.oid, extent.objectno, &req->data(), extent.length,
367           extent.offset, cache_comp, m_op_flags,
368           (this->m_trace.valid() ? &this->m_trace : nullptr));
369       } else {
370         req->send();
371       }
372     }
373   }
374
375   aio_comp->put();
376
377   image_ctx.perfcounter->inc(l_librbd_rd);
378   image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs);
379 }
380
381 template <typename I>
382 void ImageReadRequest<I>::send_image_cache_request() {
383   I &image_ctx = this->m_image_ctx;
384   assert(image_ctx.image_cache != nullptr);
385
386   AioCompletion *aio_comp = this->m_aio_comp;
387   aio_comp->set_request_count(1);
388
389   auto *req_comp = new io::ReadResult::C_ImageReadRequest(
390     aio_comp, this->m_image_extents);
391   image_ctx.image_cache->aio_read(std::move(this->m_image_extents),
392                                   &req_comp->bl, m_op_flags,
393                                   req_comp);
394 }
395
396 template <typename I>
397 void AbstractImageWriteRequest<I>::send_request() {
398   I &image_ctx = this->m_image_ctx;
399   CephContext *cct = image_ctx.cct;
400
401   RWLock::RLocker md_locker(image_ctx.md_lock);
402
403   bool journaling = false;
404
405   AioCompletion *aio_comp = this->m_aio_comp;
406   uint64_t clip_len = 0;
407   ObjectExtents object_extents;
408   ::SnapContext snapc;
409   {
410     // prevent image size from changing between computing clip and recording
411     // pending async operation
412     RWLock::RLocker snap_locker(image_ctx.snap_lock);
413     if (image_ctx.snap_id != CEPH_NOSNAP || image_ctx.read_only) {
414       aio_comp->fail(-EROFS);
415       return;
416     }
417
418     for (auto &extent : this->m_image_extents) {
419       if (extent.second == 0) {
420         continue;
421       }
422
423       // map to object extents
424       Striper::file_to_extents(cct, image_ctx.format_string, &image_ctx.layout,
425                                extent.first, extent.second, 0, object_extents);
426       clip_len += extent.second;
427     }
428
429     snapc = image_ctx.snapc;
430     journaling = (image_ctx.journal != nullptr &&
431                   image_ctx.journal->is_journal_appending());
432   }
433
434   int ret = prune_object_extents(object_extents);
435   if (ret < 0) {
436     aio_comp->fail(ret);
437     return;
438   }
439
440   if (!object_extents.empty()) {
441     uint64_t journal_tid = 0;
442     aio_comp->set_request_count(
443       object_extents.size() + get_object_cache_request_count(journaling));
444
445     ObjectRequests requests;
446     send_object_requests(object_extents, snapc,
447                          (journaling ? &requests : nullptr));
448
449     if (journaling) {
450       // in-flight ops are flushed prior to closing the journal
451       assert(image_ctx.journal != NULL);
452       journal_tid = append_journal_event(requests, m_synchronous);
453     }
454
455     if (image_ctx.object_cacher != NULL) {
456       send_object_cache_requests(object_extents, journal_tid);
457     }
458   } else {
459     // no IO to perform -- fire completion
460     aio_comp->unblock();
461   }
462
463   update_stats(clip_len);
464   aio_comp->put();
465 }
466
467 template <typename I>
468 void AbstractImageWriteRequest<I>::send_object_requests(
469     const ObjectExtents &object_extents, const ::SnapContext &snapc,
470     ObjectRequests *object_requests) {
471   I &image_ctx = this->m_image_ctx;
472   CephContext *cct = image_ctx.cct;
473
474   AioCompletion *aio_comp = this->m_aio_comp;
475   for (ObjectExtents::const_iterator p = object_extents.begin();
476        p != object_extents.end(); ++p) {
477     ldout(cct, 20) << "oid " << p->oid << " " << p->offset << "~" << p->length
478                    << " from " << p->buffer_extents << dendl;
479     C_AioRequest *req_comp = new C_AioRequest(aio_comp);
480     ObjectRequestHandle *request = create_object_request(*p, snapc,
481                                                             req_comp);
482
483     // if journaling, stash the request for later; otherwise send
484     if (request != NULL) {
485       if (object_requests != NULL) {
486         object_requests->push_back(request);
487       } else {
488         request->send();
489       }
490     }
491   }
492 }
493
494 template <typename I>
495 void ImageWriteRequest<I>::assemble_extent(const ObjectExtent &object_extent,
496                                            bufferlist *bl) {
497   for (auto q = object_extent.buffer_extents.begin();
498        q != object_extent.buffer_extents.end(); ++q) {
499     bufferlist sub_bl;
500     sub_bl.substr_of(m_bl, q->first, q->second);
501     bl->claim_append(sub_bl);
502   }
503 }
504
505 template <typename I>
506 uint64_t ImageWriteRequest<I>::append_journal_event(
507     const ObjectRequests &requests, bool synchronous) {
508   I &image_ctx = this->m_image_ctx;
509
510   uint64_t tid = 0;
511   uint64_t buffer_offset = 0;
512   assert(!this->m_image_extents.empty());
513   for (auto &extent : this->m_image_extents) {
514     bufferlist sub_bl;
515     sub_bl.substr_of(m_bl, buffer_offset, extent.second);
516     buffer_offset += extent.second;
517
518     tid = image_ctx.journal->append_write_event(extent.first, extent.second,
519                                                 sub_bl, requests, synchronous);
520   }
521
522   if (image_ctx.object_cacher == NULL) {
523     AioCompletion *aio_comp = this->m_aio_comp;
524     aio_comp->associate_journal_event(tid);
525   }
526   return tid;
527 }
528
529 template <typename I>
530 void ImageWriteRequest<I>::send_image_cache_request() {
531   I &image_ctx = this->m_image_ctx;
532   assert(image_ctx.image_cache != nullptr);
533
534   AioCompletion *aio_comp = this->m_aio_comp;
535   aio_comp->set_request_count(1);
536   C_AioRequest *req_comp = new C_AioRequest(aio_comp);
537   image_ctx.image_cache->aio_write(std::move(this->m_image_extents),
538                                    std::move(m_bl), m_op_flags, req_comp);
539 }
540
541 template <typename I>
542 void ImageWriteRequest<I>::send_object_cache_requests(
543     const ObjectExtents &object_extents, uint64_t journal_tid) {
544   I &image_ctx = this->m_image_ctx;
545   for (auto p = object_extents.begin(); p != object_extents.end(); ++p) {
546     const ObjectExtent &object_extent = *p;
547
548     bufferlist bl;
549     assemble_extent(object_extent, &bl);
550
551     AioCompletion *aio_comp = this->m_aio_comp;
552     C_AioRequest *req_comp = new C_AioRequest(aio_comp);
553     image_ctx.write_to_cache(
554       object_extent.oid, bl, object_extent.length, object_extent.offset,
555       req_comp, m_op_flags, journal_tid,
556       (this->m_trace.valid() ? &this->m_trace : nullptr));
557   }
558 }
559
560 template <typename I>
561 void ImageWriteRequest<I>::send_object_requests(
562     const ObjectExtents &object_extents, const ::SnapContext &snapc,
563     ObjectRequests *object_requests) {
564   I &image_ctx = this->m_image_ctx;
565
566   // cache handles creating object requests during writeback
567   if (image_ctx.object_cacher == NULL) {
568     AbstractImageWriteRequest<I>::send_object_requests(object_extents, snapc,
569                                                        object_requests);
570   }
571 }
572
573 template <typename I>
574 ObjectRequestHandle *ImageWriteRequest<I>::create_object_request(
575     const ObjectExtent &object_extent, const ::SnapContext &snapc,
576     Context *on_finish) {
577   I &image_ctx = this->m_image_ctx;
578   assert(image_ctx.object_cacher == NULL);
579
580   bufferlist bl;
581   assemble_extent(object_extent, &bl);
582   ObjectRequest<I> *req = ObjectRequest<I>::create_write(
583     &image_ctx, object_extent.oid.name, object_extent.objectno,
584     object_extent.offset, bl, snapc, m_op_flags, this->m_trace, on_finish);
585   return req;
586 }
587
588 template <typename I>
589 void ImageWriteRequest<I>::update_stats(size_t length) {
590   I &image_ctx = this->m_image_ctx;
591   image_ctx.perfcounter->inc(l_librbd_wr);
592   image_ctx.perfcounter->inc(l_librbd_wr_bytes, length);
593 }
594
595 template <typename I>
596 uint64_t ImageDiscardRequest<I>::append_journal_event(
597     const ObjectRequests &requests, bool synchronous) {
598   I &image_ctx = this->m_image_ctx;
599
600   uint64_t tid = 0;
601   assert(!this->m_image_extents.empty());
602   for (auto &extent : this->m_image_extents) {
603     journal::EventEntry event_entry(journal::AioDiscardEvent(extent.first,
604                                                              extent.second,
605                                                              this->m_skip_partial_discard));
606     tid = image_ctx.journal->append_io_event(std::move(event_entry),
607                                              requests, extent.first,
608                                              extent.second, synchronous);
609   }
610
611   AioCompletion *aio_comp = this->m_aio_comp;
612   aio_comp->associate_journal_event(tid);
613   return tid;
614 }
615
616 template <typename I>
617 int ImageDiscardRequest<I>::prune_object_extents(ObjectExtents &object_extents) {
618   I &image_ctx = this->m_image_ctx;
619   CephContext *cct = image_ctx.cct;
620   if (!this->m_skip_partial_discard) {
621     return 0;
622   }
623
624   for (auto p = object_extents.begin(); p != object_extents.end(); ) {
625     if (p->offset + p->length < image_ctx.layout.object_size) {
626       ldout(cct, 20) << "oid " << p->oid << " " << p->offset << "~"
627                      << p->length << " from " << p->buffer_extents
628                      << ": skip partial discard" << dendl;
629       p = object_extents.erase(p);
630     } else {
631       ++p;
632     }
633   }
634
635   return 0;
636 }
637
638 template <typename I>
639 uint32_t ImageDiscardRequest<I>::get_object_cache_request_count(bool journaling) const {
640   // extra completion request is required for tracking journal commit
641   I &image_ctx = this->m_image_ctx;
642   return (image_ctx.object_cacher != nullptr && journaling ? 1 : 0);
643 }
644
645 template <typename I>
646 void ImageDiscardRequest<I>::send_image_cache_request() {
647   I &image_ctx = this->m_image_ctx;
648   assert(image_ctx.image_cache != nullptr);
649
650   AioCompletion *aio_comp = this->m_aio_comp;
651   aio_comp->set_request_count(this->m_image_extents.size());
652   for (auto &extent : this->m_image_extents) {
653     C_AioRequest *req_comp = new C_AioRequest(aio_comp);
654     image_ctx.image_cache->aio_discard(extent.first, extent.second,
655                                        this->m_skip_partial_discard, req_comp);
656   }
657 }
658
659 template <typename I>
660 void ImageDiscardRequest<I>::send_object_cache_requests(
661     const ObjectExtents &object_extents, uint64_t journal_tid) {
662   I &image_ctx = this->m_image_ctx;
663   if (journal_tid == 0) {
664     Mutex::Locker cache_locker(image_ctx.cache_lock);
665     image_ctx.object_cacher->discard_set(image_ctx.object_set,
666                                          object_extents);
667   } else {
668     // cannot discard from cache until journal has committed
669     assert(image_ctx.journal != NULL);
670     AioCompletion *aio_comp = this->m_aio_comp;
671     image_ctx.journal->wait_event(
672       journal_tid, new C_DiscardJournalCommit<I>(image_ctx, aio_comp,
673                                                  object_extents, journal_tid));
674   }
675 }
676
677 template <typename I>
678 ObjectRequestHandle *ImageDiscardRequest<I>::create_object_request(
679     const ObjectExtent &object_extent, const ::SnapContext &snapc,
680     Context *on_finish) {
681   I &image_ctx = this->m_image_ctx;
682
683   ObjectRequest<I> *req;
684   if (object_extent.length == image_ctx.layout.object_size) {
685     req = ObjectRequest<I>::create_remove(
686       &image_ctx, object_extent.oid.name, object_extent.objectno, snapc,
687       this->m_trace, on_finish);
688   } else if (object_extent.offset + object_extent.length ==
689                image_ctx.layout.object_size) {
690     req = ObjectRequest<I>::create_truncate(
691       &image_ctx, object_extent.oid.name, object_extent.objectno,
692       object_extent.offset, snapc, this->m_trace, on_finish);
693   } else {
694     req = ObjectRequest<I>::create_zero(
695       &image_ctx, object_extent.oid.name, object_extent.objectno,
696       object_extent.offset, object_extent.length, snapc,
697       this->m_trace, on_finish);
698   }
699   return req;
700 }
701
702 template <typename I>
703 void ImageDiscardRequest<I>::update_stats(size_t length) {
704   I &image_ctx = this->m_image_ctx;
705   image_ctx.perfcounter->inc(l_librbd_discard);
706   image_ctx.perfcounter->inc(l_librbd_discard_bytes, length);
707 }
708
709 template <typename I>
710 void ImageFlushRequest<I>::send_request() {
711   I &image_ctx = this->m_image_ctx;
712   image_ctx.user_flushed();
713
714   bool journaling = false;
715   {
716     RWLock::RLocker snap_locker(image_ctx.snap_lock);
717     journaling = (image_ctx.journal != nullptr &&
718                   image_ctx.journal->is_journal_appending());
719   }
720
721   AioCompletion *aio_comp = this->m_aio_comp;
722   if (journaling) {
723     // in-flight ops are flushed prior to closing the journal
724     uint64_t journal_tid = image_ctx.journal->append_io_event(
725       journal::EventEntry(journal::AioFlushEvent()),
726       ObjectRequests(), 0, 0, false);
727
728     aio_comp->set_request_count(1);
729     aio_comp->associate_journal_event(journal_tid);
730
731     FunctionContext *flush_ctx = new FunctionContext(
732       [aio_comp, &image_ctx, journal_tid] (int r) {
733         auto ctx = new C_FlushJournalCommit<I>(image_ctx, aio_comp,
734                                                journal_tid);
735         image_ctx.journal->flush_event(journal_tid, ctx);
736
737         // track flush op for block writes
738         aio_comp->start_op(true);
739         aio_comp->put();
740     });
741
742     image_ctx.flush_async_operations(flush_ctx);
743   } else {
744     // flush rbd cache only when journaling is not enabled
745     aio_comp->set_request_count(1);
746     C_AioRequest *req_comp = new C_AioRequest(aio_comp);
747     image_ctx.flush(req_comp);
748
749     aio_comp->start_op(true);
750     aio_comp->put();
751   }
752
753   image_ctx.perfcounter->inc(l_librbd_aio_flush);
754 }
755
756 template <typename I>
757 void ImageFlushRequest<I>::send_image_cache_request() {
758   I &image_ctx = this->m_image_ctx;
759   assert(image_ctx.image_cache != nullptr);
760
761   AioCompletion *aio_comp = this->m_aio_comp;
762   aio_comp->set_request_count(1);
763   C_AioRequest *req_comp = new C_AioRequest(aio_comp);
764   image_ctx.image_cache->aio_flush(req_comp);
765 }
766
767 template <typename I>
768 bool ImageWriteSameRequest<I>::assemble_writesame_extent(const ObjectExtent &object_extent,
769                                                          bufferlist *bl, bool force_write) {
770   size_t m_data_len = m_data_bl.length();
771
772   if (!force_write) {
773     bool may_writesame = true;
774
775     for (auto q = object_extent.buffer_extents.begin();
776          q != object_extent.buffer_extents.end(); ++q) {
777       if (!(q->first % m_data_len == 0 && q->second % m_data_len == 0)) {
778         may_writesame = false;
779         break;
780       }
781     }
782
783     if (may_writesame) {
784       bl->append(m_data_bl);
785       return true;
786     }
787   }
788
789   for (auto q = object_extent.buffer_extents.begin();
790        q != object_extent.buffer_extents.end(); ++q) {
791     bufferlist sub_bl;
792     uint64_t sub_off = q->first % m_data_len;
793     uint64_t sub_len = m_data_len - sub_off;
794     uint64_t extent_left = q->second;
795     while (extent_left >= sub_len) {
796       sub_bl.substr_of(m_data_bl, sub_off, sub_len);
797       bl->claim_append(sub_bl);
798       extent_left -= sub_len;
799       if (sub_off) {
800         sub_off = 0;
801         sub_len = m_data_len;
802       }
803     }
804     if (extent_left) {
805       sub_bl.substr_of(m_data_bl, sub_off, extent_left);
806       bl->claim_append(sub_bl);
807     }
808   }
809   return false;
810 }
811
812 template <typename I>
813 uint64_t ImageWriteSameRequest<I>::append_journal_event(
814     const ObjectRequests &requests, bool synchronous) {
815   I &image_ctx = this->m_image_ctx;
816
817   uint64_t tid = 0;
818   assert(!this->m_image_extents.empty());
819   for (auto &extent : this->m_image_extents) {
820     journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first,
821                                                                extent.second,
822                                                                m_data_bl));
823     tid = image_ctx.journal->append_io_event(std::move(event_entry),
824                                              requests, extent.first,
825                                              extent.second, synchronous);
826   }
827
828   if (image_ctx.object_cacher == NULL) {
829     AioCompletion *aio_comp = this->m_aio_comp;
830     aio_comp->associate_journal_event(tid);
831   }
832   return tid;
833 }
834
835 template <typename I>
836 void ImageWriteSameRequest<I>::send_image_cache_request() {
837   I &image_ctx = this->m_image_ctx;
838   assert(image_ctx.image_cache != nullptr);
839
840   AioCompletion *aio_comp = this->m_aio_comp;
841   aio_comp->set_request_count(this->m_image_extents.size());
842   for (auto &extent : this->m_image_extents) {
843     C_AioRequest *req_comp = new C_AioRequest(aio_comp);
844     image_ctx.image_cache->aio_writesame(extent.first, extent.second,
845                                          std::move(m_data_bl), m_op_flags,
846                                          req_comp);
847   }
848 }
849
850 template <typename I>
851 void ImageWriteSameRequest<I>::send_object_cache_requests(
852     const ObjectExtents &object_extents, uint64_t journal_tid) {
853   I &image_ctx = this->m_image_ctx;
854   for (auto p = object_extents.begin(); p != object_extents.end(); ++p) {
855     const ObjectExtent &object_extent = *p;
856
857     bufferlist bl;
858     assemble_writesame_extent(object_extent, &bl, true);
859
860     AioCompletion *aio_comp = this->m_aio_comp;
861     C_AioRequest *req_comp = new C_AioRequest(aio_comp);
862     image_ctx.write_to_cache(
863       object_extent.oid, bl, object_extent.length, object_extent.offset,
864       req_comp, m_op_flags, journal_tid,
865       (this->m_trace.valid() ? &this->m_trace : nullptr));
866   }
867 }
868
869 template <typename I>
870 void ImageWriteSameRequest<I>::send_object_requests(
871     const ObjectExtents &object_extents, const ::SnapContext &snapc,
872     ObjectRequests *object_requests) {
873   I &image_ctx = this->m_image_ctx;
874
875   // cache handles creating object requests during writeback
876   if (image_ctx.object_cacher == NULL) {
877     AbstractImageWriteRequest<I>::send_object_requests(object_extents, snapc,
878                                                        object_requests);
879   }
880 }
881
882 template <typename I>
883 ObjectRequestHandle *ImageWriteSameRequest<I>::create_object_request(
884     const ObjectExtent &object_extent, const ::SnapContext &snapc,
885     Context *on_finish) {
886   I &image_ctx = this->m_image_ctx;
887   assert(image_ctx.object_cacher == NULL);
888
889   bufferlist bl;
890   ObjectRequest<I> *req;
891
892   if (assemble_writesame_extent(object_extent, &bl, false)) {
893     req = ObjectRequest<I>::create_writesame(
894       &image_ctx, object_extent.oid.name, object_extent.objectno,
895       object_extent.offset, object_extent.length,
896       bl, snapc, m_op_flags, this->m_trace, on_finish);
897     return req;
898   }
899   req = ObjectRequest<I>::create_write(
900     &image_ctx, object_extent.oid.name, object_extent.objectno,
901     object_extent.offset, bl, snapc, m_op_flags, this->m_trace, on_finish);
902   return req;
903 }
904
905 template <typename I>
906 void ImageWriteSameRequest<I>::update_stats(size_t length) {
907   I &image_ctx = this->m_image_ctx;
908   image_ctx.perfcounter->inc(l_librbd_ws);
909   image_ctx.perfcounter->inc(l_librbd_ws_bytes, length);
910 }
911
912 template <typename I>
913 uint64_t ImageCompareAndWriteRequest<I>::append_journal_event(
914     const ObjectRequests &requests, bool synchronous) {
915
916   I &image_ctx = this->m_image_ctx;
917
918   uint64_t tid = 0;
919   assert(this->m_image_extents.size() == 1);
920   auto &extent = this->m_image_extents.front();
921   journal::EventEntry event_entry(journal::AioCompareAndWriteEvent(extent.first,
922                                                                    extent.second,
923                                                                    m_cmp_bl, m_bl));
924   tid = image_ctx.journal->append_io_event(std::move(event_entry),
925                                            requests, extent.first,
926                                            extent.second, synchronous);
927
928   AioCompletion *aio_comp = this->m_aio_comp;
929   aio_comp->associate_journal_event(tid);
930
931   return tid;
932 }
933
934 template <typename I>
935 void ImageCompareAndWriteRequest<I>::send_object_cache_requests(
936   const ObjectExtents &object_extents, uint64_t journal_tid) {
937   I &image_ctx = this->m_image_ctx;
938
939   if (image_ctx.object_cacher != NULL) {
940     Mutex::Locker cache_locker(image_ctx.cache_lock);
941     image_ctx.object_cacher->discard_set(image_ctx.object_set,
942                                          object_extents);
943   }
944 }
945
946 template <typename I>
947 void ImageCompareAndWriteRequest<I>::assemble_extent(
948   const ObjectExtent &object_extent, bufferlist *bl) {
949   for (auto q = object_extent.buffer_extents.begin();
950        q != object_extent.buffer_extents.end(); ++q) {
951     bufferlist sub_bl;
952     sub_bl.substr_of(m_bl, q->first, q->second);
953     bl->claim_append(sub_bl);
954   }
955 }
956
957 template <typename I>
958 void ImageCompareAndWriteRequest<I>::send_image_cache_request() {
959   I &image_ctx = this->m_image_ctx;
960   assert(image_ctx.image_cache != nullptr);
961
962   AioCompletion *aio_comp = this->m_aio_comp;
963   aio_comp->set_request_count(1);
964   C_AioRequest *req_comp = new C_AioRequest(aio_comp);
965   image_ctx.image_cache->aio_compare_and_write(
966     std::move(this->m_image_extents), std::move(m_cmp_bl), std::move(m_bl),
967     m_mismatch_offset, m_op_flags, req_comp);
968 }
969
970 template <typename I>
971 ObjectRequestHandle *ImageCompareAndWriteRequest<I>::create_object_request(
972     const ObjectExtent &object_extent,
973     const ::SnapContext &snapc,
974     Context *on_finish) {
975   I &image_ctx = this->m_image_ctx;
976
977   bufferlist bl;
978   assemble_extent(object_extent, &bl);
979   ObjectRequest<I> *req = ObjectRequest<I>::create_compare_and_write(
980                                   &image_ctx, object_extent.oid.name,
981                                   object_extent.objectno, object_extent.offset,
982                                   m_cmp_bl, bl, snapc, m_mismatch_offset,
983                                   m_op_flags, this->m_trace, on_finish);
984   return req;
985 }
986
987 template <typename I>
988 void ImageCompareAndWriteRequest<I>::update_stats(size_t length) {
989   I &image_ctx = this->m_image_ctx;
990   image_ctx.perfcounter->inc(l_librbd_cmp);
991   image_ctx.perfcounter->inc(l_librbd_cmp_bytes, length);
992 }
993
994 template <typename I>
995 int ImageCompareAndWriteRequest<I>::prune_object_extents(ObjectExtents &object_extents) {
996   if (object_extents.size() > 1)
997     return -EINVAL;
998
999   I &image_ctx = this->m_image_ctx;
1000   uint64_t sector_size = 512ULL;
1001   uint64_t su = image_ctx.layout.stripe_unit;
1002   ObjectExtent object_extent = object_extents.front();
1003   if (object_extent.offset % sector_size + object_extent.length > sector_size ||
1004       (su != 0 && (object_extent.offset % su + object_extent.length > su)))
1005     return -EINVAL;
1006
1007   return 0;
1008 }
1009
1010 } // namespace io
1011 } // namespace librbd
1012
1013 template class librbd::io::ImageRequest<librbd::ImageCtx>;
1014 template class librbd::io::ImageReadRequest<librbd::ImageCtx>;
1015 template class librbd::io::AbstractImageWriteRequest<librbd::ImageCtx>;
1016 template class librbd::io::ImageWriteRequest<librbd::ImageCtx>;
1017 template class librbd::io::ImageDiscardRequest<librbd::ImageCtx>;
1018 template class librbd::io::ImageFlushRequest<librbd::ImageCtx>;
1019 template class librbd::io::ImageWriteSameRequest<librbd::ImageCtx>;
1020 template class librbd::io::ImageCompareAndWriteRequest<librbd::ImageCtx>;