Fix some bugs when testing opensds ansible
[stor4nfv.git] / src / ceph / src / osd / ECTransaction.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4  * Ceph - scalable distributed file system
5  *
6  * Copyright (C) 2013 Inktank Storage, Inc.
7  *
8  * This is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License version 2.1, as published by the Free Software
11  * Foundation.  See file COPYING.
12  *
13  */
14
15 #ifndef ECTRANSACTION_H
16 #define ECTRANSACTION_H
17
18 #include "OSD.h"
19 #include "PGBackend.h"
20 #include "ECUtil.h"
21 #include "erasure-code/ErasureCodeInterface.h"
22 #include "PGTransaction.h"
23 #include "ExtentCache.h"
24
25 namespace ECTransaction {
26   struct WritePlan {
27     PGTransactionUPtr t;
28     bool invalidates_cache = false; // Yes, both are possible
29     map<hobject_t,extent_set> to_read;
30     map<hobject_t,extent_set> will_write; // superset of to_read
31
32     map<hobject_t,ECUtil::HashInfoRef> hash_infos;
33   };
34
35   bool requires_overwrite(
36     uint64_t prev_size,
37     const PGTransaction::ObjectOperation &op);
38
39   template <typename F>
40   WritePlan get_write_plan(
41     const ECUtil::stripe_info_t &sinfo,
42     PGTransactionUPtr &&t,
43     F &&get_hinfo,
44     DoutPrefixProvider *dpp) {
45     WritePlan plan;
46     t->safe_create_traverse(
47       [&](pair<const hobject_t, PGTransaction::ObjectOperation> &i) {
48         ECUtil::HashInfoRef hinfo = get_hinfo(i.first);
49         plan.hash_infos[i.first] = hinfo;
50
51         uint64_t projected_size =
52           hinfo->get_projected_total_logical_size(sinfo);
53
54         if (i.second.deletes_first()) {
55           ldpp_dout(dpp, 20) << __func__ << ": delete, setting projected size"
56                              << " to 0" << dendl;
57           projected_size = 0;
58         }
59
60         hobject_t source;
61         if (i.second.has_source(&source)) {
62           plan.invalidates_cache = true;
63
64           ECUtil::HashInfoRef shinfo = get_hinfo(source);
65           projected_size = shinfo->get_projected_total_logical_size(sinfo);
66           plan.hash_infos[source] = shinfo;
67         }
68
69         auto &will_write = plan.will_write[i.first];
70         if (i.second.truncate &&
71             i.second.truncate->first < projected_size) {
72           if (!(sinfo.logical_offset_is_stripe_aligned(
73                   i.second.truncate->first))) {
74             plan.to_read[i.first].union_insert(
75               sinfo.logical_to_prev_stripe_offset(i.second.truncate->first),
76               sinfo.get_stripe_width());
77
78             ldpp_dout(dpp, 20) << __func__ << ": unaligned truncate" << dendl;
79
80             will_write.union_insert(
81               sinfo.logical_to_prev_stripe_offset(i.second.truncate->first),
82               sinfo.get_stripe_width());
83           }
84           projected_size = sinfo.logical_to_next_stripe_offset(
85             i.second.truncate->first);
86         }
87
88         extent_set raw_write_set;
89         for (auto &&extent: i.second.buffer_updates) {
90           using BufferUpdate = PGTransaction::ObjectOperation::BufferUpdate;
91           if (boost::get<BufferUpdate::CloneRange>(&(extent.get_val()))) {
92             assert(
93               0 ==
94               "CloneRange is not allowed, do_op should have returned ENOTSUPP");
95           }
96           raw_write_set.insert(extent.get_off(), extent.get_len());
97         }
98
99         auto orig_size = projected_size;
100         for (auto extent = raw_write_set.begin();
101              extent != raw_write_set.end();
102              ++extent) {
103           uint64_t head_start =
104             sinfo.logical_to_prev_stripe_offset(extent.get_start());
105           uint64_t head_finish =
106             sinfo.logical_to_next_stripe_offset(extent.get_start());
107           if (head_start > projected_size) {
108             head_start = projected_size;
109           }
110           if (head_start != head_finish &&
111               head_start < orig_size) {
112             assert(head_finish <= orig_size);
113             assert(head_finish - head_start == sinfo.get_stripe_width());
114             ldpp_dout(dpp, 20) << __func__ << ": reading partial head stripe "
115                                << head_start << "~" << sinfo.get_stripe_width()
116                                << dendl;
117             plan.to_read[i.first].union_insert(
118               head_start, sinfo.get_stripe_width());
119           }
120
121           uint64_t tail_start =
122             sinfo.logical_to_prev_stripe_offset(
123               extent.get_start() + extent.get_len());
124           uint64_t tail_finish =
125             sinfo.logical_to_next_stripe_offset(
126               extent.get_start() + extent.get_len());
127           if (tail_start != tail_finish &&
128               (head_start == head_finish || tail_start != head_start) &&
129               tail_start < orig_size) {
130             assert(tail_finish <= orig_size);
131             assert(tail_finish - tail_start == sinfo.get_stripe_width());
132             ldpp_dout(dpp, 20) << __func__ << ": reading partial tail stripe "
133                                << tail_start << "~" << sinfo.get_stripe_width()
134                                << dendl;
135             plan.to_read[i.first].union_insert(
136               tail_start, sinfo.get_stripe_width());
137           }
138
139           if (head_start != tail_finish) {
140             assert(
141               sinfo.logical_offset_is_stripe_aligned(
142                 tail_finish - head_start)
143               );
144             will_write.union_insert(
145               head_start, tail_finish - head_start);
146             if (tail_finish > projected_size)
147               projected_size = tail_finish;
148           } else {
149             assert(tail_finish <= projected_size);
150           }
151         }
152
153         if (i.second.truncate &&
154             i.second.truncate->second > projected_size) {
155           uint64_t truncating_to =
156             sinfo.logical_to_next_stripe_offset(i.second.truncate->second);
157           ldpp_dout(dpp, 20) << __func__ << ": truncating out to "
158                              <<  truncating_to
159                              << dendl;
160           will_write.union_insert(projected_size,
161                                   truncating_to - projected_size);
162           projected_size = truncating_to;
163         }
164
165         ldpp_dout(dpp, 20) << __func__ << ": " << i.first
166                            << " projected size "
167                            << projected_size
168                            << dendl;
169         hinfo->set_projected_total_logical_size(
170           sinfo,
171           projected_size);
172
173         /* validate post conditions:
174          * to_read should have an entry for i.first iff it isn't empty
175          * and if we are reading from i.first, we can't be renaming or
176          * cloning it */
177         assert(plan.to_read.count(i.first) == 0 ||
178                (!plan.to_read.at(i.first).empty() &&
179                 !i.second.has_source()));
180       });
181     plan.t = std::move(t);
182     return plan;
183   }
184
185   void generate_transactions(
186     WritePlan &plan,
187     ErasureCodeInterfaceRef &ecimpl,
188     pg_t pgid,
189     bool legacy_log_entries,
190     const ECUtil::stripe_info_t &sinfo,
191     const map<hobject_t,extent_map> &partial_extents,
192     vector<pg_log_entry_t> &entries,
193     map<hobject_t,extent_map> *written,
194     map<shard_id_t, ObjectStore::Transaction> *transactions,
195     set<hobject_t> *temp_added,
196     set<hobject_t> *temp_removed,
197     DoutPrefixProvider *dpp);
198 };
199
200 #endif