Fix some bugs when testing opensds ansible
[stor4nfv.git] / src / ceph / src / librbd / api / DiffIterate.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "librbd/api/DiffIterate.h"
5 #include "librbd/ImageCtx.h"
6 #include "librbd/ImageState.h"
7 #include "librbd/ObjectMap.h"
8 #include "librbd/Utils.h"
9 #include "librbd/internal.h"
10 #include "include/rados/librados.hpp"
11 #include "include/interval_set.h"
12 #include "common/errno.h"
13 #include "common/Throttle.h"
14 #include "librados/snap_set_diff.h"
15 #include <boost/tuple/tuple.hpp>
16 #include <list>
17 #include <map>
18 #include <vector>
19
20 #define dout_subsys ceph_subsys_rbd
21 #undef dout_prefix
22 #define dout_prefix *_dout << "librbd::DiffIterate: "
23
24 namespace librbd {
25 namespace api {
26
27 namespace {
28
29 enum ObjectDiffState {
30   OBJECT_DIFF_STATE_NONE    = 0,
31   OBJECT_DIFF_STATE_UPDATED = 1,
32   OBJECT_DIFF_STATE_HOLE    = 2
33 };
34
35 struct DiffContext {
36   DiffIterate<>::Callback callback;
37   void *callback_arg;
38   bool whole_object;
39   uint64_t from_snap_id;
40   uint64_t end_snap_id;
41   interval_set<uint64_t> parent_diff;
42   OrderedThrottle throttle;
43
44   template <typename I>
45   DiffContext(I &image_ctx, DiffIterate<>::Callback callback,
46               void *callback_arg, bool _whole_object, uint64_t _from_snap_id,
47               uint64_t _end_snap_id)
48     : callback(callback), callback_arg(callback_arg),
49       whole_object(_whole_object), from_snap_id(_from_snap_id),
50       end_snap_id(_end_snap_id),
51       throttle(image_ctx.concurrent_management_ops, true) {
52   }
53 };
54
55 class C_DiffObject : public Context {
56 public:
57   template <typename I>
58   C_DiffObject(I &image_ctx, librados::IoCtx &head_ctx,
59                DiffContext &diff_context, const std::string &oid,
60                uint64_t offset, const std::vector<ObjectExtent> &object_extents)
61     : m_cct(image_ctx.cct), m_head_ctx(head_ctx),
62       m_diff_context(diff_context), m_oid(oid), m_offset(offset),
63       m_object_extents(object_extents), m_snap_ret(0) {
64   }
65
66   void send() {
67     C_OrderedThrottle *ctx = m_diff_context.throttle.start_op(this);
68     librados::AioCompletion *rados_completion =
69       util::create_rados_callback(ctx);
70
71     librados::ObjectReadOperation op;
72     op.list_snaps(&m_snap_set, &m_snap_ret);
73
74     int r = m_head_ctx.aio_operate(m_oid, rados_completion, &op, NULL);
75     assert(r == 0);
76     rados_completion->release();
77   }
78
79 protected:
80   typedef boost::tuple<uint64_t, size_t, bool> Diff;
81   typedef std::list<Diff> Diffs;
82
83   void finish(int r) override {
84     CephContext *cct = m_cct;
85     if (r == 0 && m_snap_ret < 0) {
86       r = m_snap_ret;
87     }
88
89     Diffs diffs;
90     if (r == 0) {
91       ldout(cct, 20) << "object " << m_oid << ": list_snaps complete" << dendl;
92       compute_diffs(&diffs);
93     } else if (r == -ENOENT) {
94       ldout(cct, 20) << "object " << m_oid << ": list_snaps (not found)"
95                      << dendl;
96       r = 0;
97       compute_parent_overlap(&diffs);
98     } else {
99       ldout(cct, 20) << "object " << m_oid << ": list_snaps failed: "
100                      << cpp_strerror(r) << dendl;
101     }
102
103     if (r == 0) {
104       for (Diffs::const_iterator d = diffs.begin(); d != diffs.end(); ++d) {
105         r = m_diff_context.callback(d->get<0>(), d->get<1>(), d->get<2>(),
106                                     m_diff_context.callback_arg);
107         if (r < 0) {
108           break;
109         }
110       }
111     }
112     m_diff_context.throttle.end_op(r);
113   }
114
115 private:
116   CephContext *m_cct;
117   librados::IoCtx &m_head_ctx;
118   DiffContext &m_diff_context;
119   std::string m_oid;
120   uint64_t m_offset;
121   std::vector<ObjectExtent> m_object_extents;
122
123   librados::snap_set_t m_snap_set;
124   int m_snap_ret;
125
126   void compute_diffs(Diffs *diffs) {
127     CephContext *cct = m_cct;
128
129     // calc diff from from_snap_id -> to_snap_id
130     interval_set<uint64_t> diff;
131     uint64_t end_size;
132     bool end_exists;
133     librados::snap_t clone_end_snap_id;
134     calc_snap_set_diff(cct, m_snap_set, m_diff_context.from_snap_id,
135                        m_diff_context.end_snap_id, &diff, &end_size,
136                        &end_exists, &clone_end_snap_id);
137     ldout(cct, 20) << "  diff " << diff << " end_exists=" << end_exists
138                    << dendl;
139     if (diff.empty()) {
140       if (m_diff_context.from_snap_id == 0 && !end_exists) {
141         compute_parent_overlap(diffs);
142       }
143       return;
144     } else if (m_diff_context.whole_object) {
145       // provide the full object extents to the callback
146       for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
147            q != m_object_extents.end(); ++q) {
148         diffs->push_back(boost::make_tuple(m_offset + q->offset, q->length,
149                                            end_exists));
150       }
151       return;
152     }
153
154     for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
155          q != m_object_extents.end(); ++q) {
156       ldout(cct, 20) << "diff_iterate object " << m_oid << " extent "
157                      << q->offset << "~" << q->length << " from "
158                      << q->buffer_extents << dendl;
159       uint64_t opos = q->offset;
160       for (vector<pair<uint64_t,uint64_t> >::iterator r =
161              q->buffer_extents.begin();
162            r != q->buffer_extents.end(); ++r) {
163         interval_set<uint64_t> overlap;  // object extents
164         overlap.insert(opos, r->second);
165         overlap.intersection_of(diff);
166         ldout(cct, 20) << " opos " << opos
167                        << " buf " << r->first << "~" << r->second
168                        << " overlap " << overlap << dendl;
169         for (interval_set<uint64_t>::iterator s = overlap.begin();
170                s != overlap.end(); ++s) {
171           uint64_t su_off = s.get_start() - opos;
172           uint64_t logical_off = m_offset + r->first + su_off;
173           ldout(cct, 20) << "   overlap extent " << s.get_start() << "~"
174                          << s.get_len() << " logical " << logical_off << "~"
175                          << s.get_len() << dendl;
176           diffs->push_back(boost::make_tuple(logical_off, s.get_len(),
177                            end_exists));
178         }
179         opos += r->second;
180       }
181       assert(opos == q->offset + q->length);
182     }
183   }
184
185   void compute_parent_overlap(Diffs *diffs) {
186     if (m_diff_context.from_snap_id == 0 &&
187         !m_diff_context.parent_diff.empty()) {
188       // report parent diff instead
189       for (vector<ObjectExtent>::iterator q = m_object_extents.begin();
190            q != m_object_extents.end(); ++q) {
191         for (vector<pair<uint64_t,uint64_t> >::iterator r =
192                q->buffer_extents.begin();
193              r != q->buffer_extents.end(); ++r) {
194           interval_set<uint64_t> o;
195           o.insert(m_offset + r->first, r->second);
196           o.intersection_of(m_diff_context.parent_diff);
197           ldout(m_cct, 20) << " reporting parent overlap " << o << dendl;
198           for (interval_set<uint64_t>::iterator s = o.begin(); s != o.end();
199                ++s) {
200             diffs->push_back(boost::make_tuple(s.get_start(), s.get_len(),
201                              true));
202           }
203         }
204       }
205     }
206   }
207 };
208
209 int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) {
210   // it's possible for a discard to create a hole in the parent image -- ignore
211   if (exists) {
212     interval_set<uint64_t> *diff = static_cast<interval_set<uint64_t> *>(arg);
213     diff->insert(off, len);
214   }
215   return 0;
216 }
217
218 } // anonymous namespace
219
220 template <typename I>
221 int DiffIterate<I>::diff_iterate(I *ictx,
222                                  const cls::rbd::SnapshotNamespace& from_snap_namespace,
223                                  const char *fromsnapname,
224                                  uint64_t off, uint64_t len,
225                                  bool include_parent, bool whole_object,
226                                  int (*cb)(uint64_t, size_t, int, void *),
227                                  void *arg)
228 {
229   ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off
230                  << " len = " << len << dendl;
231
232   // ensure previous writes are visible to listsnaps
233   {
234     RWLock::RLocker owner_locker(ictx->owner_lock);
235     ictx->flush();
236   }
237
238   int r = ictx->state->refresh_if_required();
239   if (r < 0) {
240     return r;
241   }
242
243   ictx->snap_lock.get_read();
244   r = clip_io(ictx, off, &len);
245   ictx->snap_lock.put_read();
246   if (r < 0) {
247     return r;
248   }
249
250   DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len,
251                       include_parent, whole_object, cb, arg);
252   r = command.execute();
253   return r;
254 }
255
256 template <typename I>
257 int DiffIterate<I>::execute() {
258   CephContext* cct = m_image_ctx.cct;
259
260   librados::IoCtx head_ctx;
261   librados::snap_t from_snap_id = 0;
262   librados::snap_t end_snap_id;
263   uint64_t from_size = 0;
264   uint64_t end_size;
265   {
266     RWLock::RLocker md_locker(m_image_ctx.md_lock);
267     RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
268     head_ctx.dup(m_image_ctx.data_ctx);
269     if (m_from_snap_name) {
270       from_snap_id = m_image_ctx.get_snap_id(m_from_snap_namespace, m_from_snap_name);
271       from_size = m_image_ctx.get_image_size(from_snap_id);
272     }
273     end_snap_id = m_image_ctx.snap_id;
274     end_size = m_image_ctx.get_image_size(end_snap_id);
275   }
276
277   if (from_snap_id == CEPH_NOSNAP) {
278     return -ENOENT;
279   }
280   if (from_snap_id == end_snap_id) {
281     // no diff.
282     return 0;
283   }
284   if (from_snap_id >= end_snap_id) {
285     return -EINVAL;
286   }
287
288   int r;
289   bool fast_diff_enabled = false;
290   BitVector<2> object_diff_state;
291   {
292     RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
293     if (m_whole_object && (m_image_ctx.features & RBD_FEATURE_FAST_DIFF) != 0) {
294       r = diff_object_map(from_snap_id, end_snap_id, &object_diff_state);
295       if (r < 0) {
296         ldout(cct, 5) << "fast diff disabled" << dendl;
297       } else {
298         ldout(cct, 5) << "fast diff enabled" << dendl;
299         fast_diff_enabled = true;
300       }
301     }
302   }
303
304   // we must list snaps via the head, not end snap
305   head_ctx.snap_set_read(CEPH_SNAPDIR);
306
307   ldout(cct, 5) << "diff_iterate from " << from_snap_id << " to "
308                 << end_snap_id << " size from " << from_size
309                 << " to " << end_size << dendl;
310
311   // check parent overlap only if we are comparing to the beginning of time
312   DiffContext diff_context(m_image_ctx, m_callback, m_callback_arg,
313                            m_whole_object, from_snap_id, end_snap_id);
314   if (m_include_parent && from_snap_id == 0) {
315     RWLock::RLocker l(m_image_ctx.snap_lock);
316     RWLock::RLocker l2(m_image_ctx.parent_lock);
317     uint64_t overlap = 0;
318     m_image_ctx.get_parent_overlap(m_image_ctx.snap_id, &overlap);
319     r = 0;
320     if (m_image_ctx.parent && overlap > 0) {
321       ldout(cct, 10) << " first getting parent diff" << dendl;
322       DiffIterate diff_parent(*m_image_ctx.parent, {},
323                               nullptr, 0, overlap,
324                               m_include_parent, m_whole_object,
325                               &simple_diff_cb,
326                               &diff_context.parent_diff);
327       r = diff_parent.execute();
328     }
329     if (r < 0) {
330       return r;
331     }
332   }
333
334   uint64_t period = m_image_ctx.get_stripe_period();
335   uint64_t off = m_offset;
336   uint64_t left = m_length;
337
338   while (left > 0) {
339     uint64_t period_off = off - (off % period);
340     uint64_t read_len = min(period_off + period - off, left);
341
342     // map to extents
343     map<object_t,vector<ObjectExtent> > object_extents;
344     Striper::file_to_extents(cct, m_image_ctx.format_string,
345                              &m_image_ctx.layout, off, read_len, 0,
346                              object_extents, 0);
347
348     // get snap info for each object
349     for (map<object_t,vector<ObjectExtent> >::iterator p =
350            object_extents.begin();
351          p != object_extents.end(); ++p) {
352       ldout(cct, 20) << "object " << p->first << dendl;
353
354       if (fast_diff_enabled) {
355         const uint64_t object_no = p->second.front().objectno;
356         if (object_diff_state[object_no] != OBJECT_DIFF_STATE_NONE) {
357           bool updated = (object_diff_state[object_no] ==
358                             OBJECT_DIFF_STATE_UPDATED);
359           for (std::vector<ObjectExtent>::iterator q = p->second.begin();
360                q != p->second.end(); ++q) {
361             r = m_callback(off + q->offset, q->length, updated, m_callback_arg);
362             if (r < 0) {
363               return r;
364             }
365           }
366         }
367       } else {
368         C_DiffObject *diff_object = new C_DiffObject(m_image_ctx, head_ctx,
369                                                      diff_context,
370                                                      p->first.name, off,
371                                                      p->second);
372         diff_object->send();
373
374         if (diff_context.throttle.pending_error()) {
375           r = diff_context.throttle.wait_for_ret();
376           return r;
377         }
378       }
379     }
380
381     left -= read_len;
382     off += read_len;
383   }
384
385   r = diff_context.throttle.wait_for_ret();
386   if (r < 0) {
387     return r;
388   }
389   return 0;
390 }
391
392 template <typename I>
393 int DiffIterate<I>::diff_object_map(uint64_t from_snap_id, uint64_t to_snap_id,
394                                     BitVector<2>* object_diff_state) {
395   assert(m_image_ctx.snap_lock.is_locked());
396   CephContext* cct = m_image_ctx.cct;
397
398   bool diff_from_start = (from_snap_id == 0);
399   if (from_snap_id == 0) {
400     if (!m_image_ctx.snaps.empty()) {
401       from_snap_id = m_image_ctx.snaps.back();
402     } else {
403       from_snap_id = CEPH_NOSNAP;
404     }
405   }
406
407   object_diff_state->clear();
408   uint64_t current_snap_id = from_snap_id;
409   uint64_t next_snap_id = to_snap_id;
410   BitVector<2> prev_object_map;
411   bool prev_object_map_valid = false;
412   while (true) {
413     uint64_t current_size = m_image_ctx.size;
414     if (current_snap_id != CEPH_NOSNAP) {
415       std::map<librados::snap_t, SnapInfo>::const_iterator snap_it =
416         m_image_ctx.snap_info.find(current_snap_id);
417       assert(snap_it != m_image_ctx.snap_info.end());
418       current_size = snap_it->second.size;
419
420       ++snap_it;
421       if (snap_it != m_image_ctx.snap_info.end()) {
422         next_snap_id = snap_it->first;
423       } else {
424         next_snap_id = CEPH_NOSNAP;
425       }
426     }
427
428     uint64_t flags;
429     int r = m_image_ctx.get_flags(from_snap_id, &flags);
430     if (r < 0) {
431       lderr(cct) << "diff_object_map: failed to retrieve image flags" << dendl;
432       return r;
433     }
434     if ((flags & RBD_FLAG_FAST_DIFF_INVALID) != 0) {
435       ldout(cct, 1) << "diff_object_map: cannot perform fast diff on invalid "
436                     << "object map" << dendl;
437       return -EINVAL;
438     }
439
440     BitVector<2> object_map;
441     std::string oid(ObjectMap<>::object_map_name(m_image_ctx.id,
442                                                  current_snap_id));
443     r = cls_client::object_map_load(&m_image_ctx.md_ctx, oid, &object_map);
444     if (r < 0) {
445       lderr(cct) << "diff_object_map: failed to load object map " << oid
446                  << dendl;
447       return r;
448     }
449     ldout(cct, 20) << "diff_object_map: loaded object map " << oid << dendl;
450
451     uint64_t num_objs = Striper::get_num_objects(m_image_ctx.layout,
452                                                  current_size);
453     if (object_map.size() < num_objs) {
454       ldout(cct, 1) << "diff_object_map: object map too small: "
455                     << object_map.size() << " < " << num_objs << dendl;
456       return -EINVAL;
457     }
458     object_map.resize(num_objs);
459
460     uint64_t overlap = MIN(object_map.size(), prev_object_map.size());
461     for (uint64_t i = 0; i < overlap; ++i) {
462       ldout(cct, 20) << __func__ << ": object state: " << i << " "
463                      << static_cast<uint32_t>(prev_object_map[i])
464                      << "->" << static_cast<uint32_t>(object_map[i]) << dendl;
465       if (object_map[i] == OBJECT_NONEXISTENT) {
466         if (prev_object_map[i] != OBJECT_NONEXISTENT) {
467           (*object_diff_state)[i] = OBJECT_DIFF_STATE_HOLE;
468         }
469       } else if (object_map[i] == OBJECT_EXISTS ||
470                  (prev_object_map[i] != object_map[i] &&
471                   !(prev_object_map[i] == OBJECT_EXISTS &&
472                     object_map[i] == OBJECT_EXISTS_CLEAN))) {
473         (*object_diff_state)[i] = OBJECT_DIFF_STATE_UPDATED;
474       }
475     }
476     ldout(cct, 20) << "diff_object_map: computed overlap diffs" << dendl;
477
478     object_diff_state->resize(object_map.size());
479     if (object_map.size() > prev_object_map.size() &&
480         (diff_from_start || prev_object_map_valid)) {
481       for (uint64_t i = overlap; i < object_diff_state->size(); ++i) {
482         ldout(cct, 20) << __func__ << ": object state: " << i << " "
483                        << "->" << static_cast<uint32_t>(object_map[i]) << dendl;
484         if (object_map[i] == OBJECT_NONEXISTENT) {
485           (*object_diff_state)[i] = OBJECT_DIFF_STATE_NONE;
486         } else {
487           (*object_diff_state)[i] = OBJECT_DIFF_STATE_UPDATED;
488         }
489       }
490     }
491     ldout(cct, 20) << "diff_object_map: computed resize diffs" << dendl;
492
493     if (current_snap_id == next_snap_id || next_snap_id > to_snap_id) {
494       break;
495     }
496     current_snap_id = next_snap_id;
497     prev_object_map = object_map;
498     prev_object_map_valid = true;
499   }
500   return 0;
501 }
502
503 } // namespace api
504 } // namespace librbd
505
506 template class librbd::api::DiffIterate<librbd::ImageCtx>;