1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2013 Inktank Storage, Inc.
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #ifndef ECTRANSACTION_H
16 #define ECTRANSACTION_H
19 #include "PGBackend.h"
21 #include "erasure-code/ErasureCodeInterface.h"
22 #include "PGTransaction.h"
23 #include "ExtentCache.h"
25 namespace ECTransaction {
28 bool invalidates_cache = false; // Yes, both are possible
29 map<hobject_t,extent_set> to_read;
30 map<hobject_t,extent_set> will_write; // superset of to_read
32 map<hobject_t,ECUtil::HashInfoRef> hash_infos;
35 bool requires_overwrite(
37 const PGTransaction::ObjectOperation &op);
40 WritePlan get_write_plan(
41 const ECUtil::stripe_info_t &sinfo,
42 PGTransactionUPtr &&t,
44 DoutPrefixProvider *dpp) {
46 t->safe_create_traverse(
47 [&](pair<const hobject_t, PGTransaction::ObjectOperation> &i) {
48 ECUtil::HashInfoRef hinfo = get_hinfo(i.first);
49 plan.hash_infos[i.first] = hinfo;
51 uint64_t projected_size =
52 hinfo->get_projected_total_logical_size(sinfo);
54 if (i.second.deletes_first()) {
55 ldpp_dout(dpp, 20) << __func__ << ": delete, setting projected size"
61 if (i.second.has_source(&source)) {
62 plan.invalidates_cache = true;
64 ECUtil::HashInfoRef shinfo = get_hinfo(source);
65 projected_size = shinfo->get_projected_total_logical_size(sinfo);
66 plan.hash_infos[source] = shinfo;
69 auto &will_write = plan.will_write[i.first];
70 if (i.second.truncate &&
71 i.second.truncate->first < projected_size) {
72 if (!(sinfo.logical_offset_is_stripe_aligned(
73 i.second.truncate->first))) {
74 plan.to_read[i.first].union_insert(
75 sinfo.logical_to_prev_stripe_offset(i.second.truncate->first),
76 sinfo.get_stripe_width());
78 ldpp_dout(dpp, 20) << __func__ << ": unaligned truncate" << dendl;
80 will_write.union_insert(
81 sinfo.logical_to_prev_stripe_offset(i.second.truncate->first),
82 sinfo.get_stripe_width());
84 projected_size = sinfo.logical_to_next_stripe_offset(
85 i.second.truncate->first);
88 extent_set raw_write_set;
89 for (auto &&extent: i.second.buffer_updates) {
90 using BufferUpdate = PGTransaction::ObjectOperation::BufferUpdate;
91 if (boost::get<BufferUpdate::CloneRange>(&(extent.get_val()))) {
94 "CloneRange is not allowed, do_op should have returned ENOTSUPP");
96 raw_write_set.insert(extent.get_off(), extent.get_len());
99 auto orig_size = projected_size;
100 for (auto extent = raw_write_set.begin();
101 extent != raw_write_set.end();
103 uint64_t head_start =
104 sinfo.logical_to_prev_stripe_offset(extent.get_start());
105 uint64_t head_finish =
106 sinfo.logical_to_next_stripe_offset(extent.get_start());
107 if (head_start > projected_size) {
108 head_start = projected_size;
110 if (head_start != head_finish &&
111 head_start < orig_size) {
112 assert(head_finish <= orig_size);
113 assert(head_finish - head_start == sinfo.get_stripe_width());
114 ldpp_dout(dpp, 20) << __func__ << ": reading partial head stripe "
115 << head_start << "~" << sinfo.get_stripe_width()
117 plan.to_read[i.first].union_insert(
118 head_start, sinfo.get_stripe_width());
121 uint64_t tail_start =
122 sinfo.logical_to_prev_stripe_offset(
123 extent.get_start() + extent.get_len());
124 uint64_t tail_finish =
125 sinfo.logical_to_next_stripe_offset(
126 extent.get_start() + extent.get_len());
127 if (tail_start != tail_finish &&
128 (head_start == head_finish || tail_start != head_start) &&
129 tail_start < orig_size) {
130 assert(tail_finish <= orig_size);
131 assert(tail_finish - tail_start == sinfo.get_stripe_width());
132 ldpp_dout(dpp, 20) << __func__ << ": reading partial tail stripe "
133 << tail_start << "~" << sinfo.get_stripe_width()
135 plan.to_read[i.first].union_insert(
136 tail_start, sinfo.get_stripe_width());
139 if (head_start != tail_finish) {
141 sinfo.logical_offset_is_stripe_aligned(
142 tail_finish - head_start)
144 will_write.union_insert(
145 head_start, tail_finish - head_start);
146 if (tail_finish > projected_size)
147 projected_size = tail_finish;
149 assert(tail_finish <= projected_size);
153 if (i.second.truncate &&
154 i.second.truncate->second > projected_size) {
155 uint64_t truncating_to =
156 sinfo.logical_to_next_stripe_offset(i.second.truncate->second);
157 ldpp_dout(dpp, 20) << __func__ << ": truncating out to "
160 will_write.union_insert(projected_size,
161 truncating_to - projected_size);
162 projected_size = truncating_to;
165 ldpp_dout(dpp, 20) << __func__ << ": " << i.first
166 << " projected size "
169 hinfo->set_projected_total_logical_size(
173 /* validate post conditions:
174 * to_read should have an entry for i.first iff it isn't empty
175 * and if we are reading from i.first, we can't be renaming or
177 assert(plan.to_read.count(i.first) == 0 ||
178 (!plan.to_read.at(i.first).empty() &&
179 !i.second.has_source()));
181 plan.t = std::move(t);
185 void generate_transactions(
187 ErasureCodeInterfaceRef &ecimpl,
189 bool legacy_log_entries,
190 const ECUtil::stripe_info_t &sinfo,
191 const map<hobject_t,extent_map> &partial_extents,
192 vector<pg_log_entry_t> &entries,
193 map<hobject_t,extent_map> *written,
194 map<shard_id_t, ObjectStore::Transaction> *transactions,
195 set<hobject_t> *temp_added,
196 set<hobject_t> *temp_removed,
197 DoutPrefixProvider *dpp);