1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
16 #ifndef CEPH_MOSDSUBOP_H
17 #define CEPH_MOSDSUBOP_H
19 #include "MOSDFastDispatchOp.h"
21 #include "include/ceph_features.h"
24 * OSD sub op - for internal ops on pobjects between primary and replicas(/stripes/whatever)
27 class MOSDSubOp : public MOSDFastDispatchOp {
29 static const int HEAD_VERSION = 12;
30 static const int COMPAT_VERSION = 7;
33 epoch_t map_epoch = 0;
35 // metadata from original request
42 object_locator_t oloc;
50 bool old_exists = false;
51 uint64_t old_size = 0;
52 eversion_t old_version;
56 // transaction to exec
63 // piggybacked osd/og state
64 eversion_t pg_trim_to; // primary->replica: trim to here
65 eversion_t pg_roll_forward_to; // primary->replica: trim rollback
67 osd_peer_stat_t peer_stat;
69 map<string,bufferlist> attrset;
71 interval_set<uint64_t> data_subset;
72 map<hobject_t, interval_set<uint64_t>> clone_subsets;
74 bool first = false, complete = false;
76 interval_set<uint64_t> data_included;
77 ObjectRecoveryInfo recovery_info;
79 // reflects result of current push
80 ObjectRecoveryProgress recovery_progress;
82 // reflects progress before current push
83 ObjectRecoveryProgress current_progress;
85 map<string,bufferlist> omap_entries;
86 bufferlist omap_header;
89 hobject_t new_temp_oid; ///< new temp object that we must now start tracking
90 hobject_t discard_temp_oid; ///< previously used temp object that we can now stop tracking
92 /// non-empty if this transaction involves a hit_set history update
93 boost::optional<pg_hit_set_history_t> updated_hit_set_history;
95 epoch_t get_map_epoch() const override {
98 spg_t get_spg() const override {
102 int get_cost() const override {
103 if (ops.size() == 1 && ops[0].op.op == CEPH_OSD_OP_PULL)
104 return ops[0].op.extent.length;
105 return data.length();
108 void decode_payload() override {
109 //since we drop incorrect_pools flag, now we only support
111 assert (header.version >= 7);
112 bufferlist::iterator p = payload.begin();
113 ::decode(map_epoch, p);
115 ::decode(pgid.pgid, p);
119 ::decode(num_ops, p);
122 for (unsigned i = 0; i < num_ops; i++) {
123 ::decode(ops[i].op, p);
124 ops[i].indata.substr_of(data, off, ops[i].op.payload_len);
125 off += ops[i].op.payload_len;
128 //we don't need noop anymore
130 ::decode(noop_dont_need, p);
132 ::decode(acks_wanted, p);
133 ::decode(version, p);
134 ::decode(old_exists, p);
135 ::decode(old_size, p);
136 ::decode(old_version, p);
137 ::decode(snapset, p);
139 if (header.version <= 11) {
140 SnapContext snapc_dont_need;
141 ::decode(snapc_dont_need, p);
145 ::decode(pg_stats, p);
146 ::decode(pg_trim_to, p);
147 ::decode(peer_stat, p);
148 ::decode(attrset, p);
150 ::decode(data_subset, p);
151 ::decode(clone_subsets, p);
154 ::decode(complete, p);
156 ::decode(data_included, p);
157 recovery_info.decode(p, pgid.pool());
158 ::decode(recovery_progress, p);
159 ::decode(current_progress, p);
160 ::decode(omap_entries, p);
161 ::decode(omap_header, p);
163 if (header.version >= 8) {
164 ::decode(new_temp_oid, p);
165 ::decode(discard_temp_oid, p);
168 if (header.version >= 9) {
170 ::decode(pgid.shard, p);
174 shard_id_t::NO_SHARD);
175 pgid.shard = shard_id_t::NO_SHARD;
177 if (header.version >= 10) {
178 ::decode(updated_hit_set_history, p);
180 if (header.version >= 11) {
181 ::decode(pg_roll_forward_to, p);
183 pg_roll_forward_to = pg_trim_to;
187 void finish_decode() { }
189 void encode_payload(uint64_t features) override {
190 header.version = HEAD_VERSION;
191 ::encode(map_epoch, payload);
192 ::encode(reqid, payload);
193 ::encode(pgid.pgid, payload);
194 ::encode(poid, payload);
196 __u32 num_ops = ops.size();
197 ::encode(num_ops, payload);
198 for (unsigned i = 0; i < ops.size(); i++) {
199 ops[i].op.payload_len = ops[i].indata.length();
200 ::encode(ops[i].op, payload);
201 data.append(ops[i].indata);
203 ::encode(mtime, payload);
204 //encode a false here for backward compatiable
205 ::encode(false, payload);
206 ::encode(acks_wanted, payload);
207 ::encode(version, payload);
208 ::encode(old_exists, payload);
209 ::encode(old_size, payload);
210 ::encode(old_version, payload);
211 ::encode(snapset, payload);
213 if ((features & CEPH_FEATURE_OSDSUBOP_NO_SNAPCONTEXT) == 0) {
215 SnapContext dummy_snapc;
216 ::encode(dummy_snapc, payload);
219 ::encode(logbl, payload);
220 ::encode(pg_stats, payload);
221 ::encode(pg_trim_to, payload);
222 ::encode(peer_stat, payload);
223 ::encode(attrset, payload);
224 ::encode(data_subset, payload);
225 ::encode(clone_subsets, payload);
227 header.data_off = ops[0].op.extent.offset;
230 ::encode(first, payload);
231 ::encode(complete, payload);
232 ::encode(oloc, payload);
233 ::encode(data_included, payload);
234 ::encode(recovery_info, payload, features);
235 ::encode(recovery_progress, payload);
236 ::encode(current_progress, payload);
237 ::encode(omap_entries, payload);
238 ::encode(omap_header, payload);
239 ::encode(new_temp_oid, payload);
240 ::encode(discard_temp_oid, payload);
241 ::encode(from, payload);
242 ::encode(pgid.shard, payload);
243 ::encode(updated_hit_set_history, payload);
244 ::encode(pg_roll_forward_to, payload);
248 : MOSDFastDispatchOp(MSG_OSD_SUBOP, HEAD_VERSION, COMPAT_VERSION) { }
249 MOSDSubOp(osd_reqid_t r, pg_shard_t from,
250 spg_t p, const hobject_t& po, int aw,
251 epoch_t mape, ceph_tid_t rtid, eversion_t v)
252 : MOSDFastDispatchOp(MSG_OSD_SUBOP, HEAD_VERSION, COMPAT_VERSION),
259 old_exists(false), old_size(0),
261 first(false), complete(false) {
262 memset(&peer_stat, 0, sizeof(peer_stat));
266 ~MOSDSubOp() override {}
269 const char *get_type_name() const override { return "osd_sub_op"; }
270 void print(ostream& out) const override {
271 out << "osd_sub_op(" << reqid
279 out << " v " << version
280 << " snapset=" << snapset;
281 if (!data_subset.empty()) out << " subset " << data_subset;
282 if (updated_hit_set_history)
283 out << ", has_updated_hit_set_history";