1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #ifndef CEPH_OSD_INTERNAL_TYPES_H
5 #define CEPH_OSD_INTERNAL_TYPES_H
11 * keep tabs on object modifications that are in flight.
12 * we need to know the projected existence, size, snapset,
13 * etc., because we don't send writes down to disk until after
17 struct SnapSetContext {
24 explicit SnapSetContext(const hobject_t& o) :
25 oid(o), ref(0), registered(false), exists(true) { }
32 bool exists; ///< the stored object exists (i.e., we will remember the object_info_t)
34 ObjectState() : exists(false) {}
36 ObjectState(const object_info_t &oi_, bool exists_)
37 : oi(oi_), exists(exists_) {}
40 typedef ceph::shared_ptr<ObjectContext> ObjectContextRef;
42 struct ObjectContext {
45 SnapSetContext *ssc; // may be null
47 Context *destructor_callback;
53 int unstable_writes, readers, writers_waiting, readers_waiting;
56 // any entity in obs.oi.watchers MUST be in either watchers or unconnected_watchers.
57 map<pair<uint64_t, entity_name_t>, WatchRef> watchers;
60 map<string, bufferlist> attr_cache;
69 static const char *get_state_name(State s) {
71 case RWNONE: return "none";
72 case RWREAD: return "read";
73 case RWWRITE: return "write";
74 case RWEXCL: return "excl";
75 default: return "???";
78 const char *get_state_name() const {
79 return get_state_name(state);
82 list<OpRequestRef> waiters; ///< ops waiting on state change
83 int count; ///< number of readers or writers
85 State state:4; ///< rw state
86 /// if set, restart backfill when we can get a read lock
87 bool recovery_read_marker:1;
88 /// if set, requeue snaptrim on lock release
89 bool snaptrimmer_write_marker:1;
94 recovery_read_marker(false),
95 snaptrimmer_write_marker(false)
97 bool get_read(OpRequestRef op) {
98 if (get_read_lock()) {
101 waiters.push_back(op);
104 /// this function adjusts the counts if necessary
105 bool get_read_lock() {
106 // don't starve anybody!
107 if (!waiters.empty()) {
123 assert(0 == "unhandled case");
128 bool get_write(OpRequestRef op, bool greedy=false) {
129 if (get_write_lock(greedy)) {
133 waiters.push_back(op);
136 bool get_write_lock(bool greedy=false) {
138 // don't starve anybody!
139 if (!waiters.empty() ||
140 recovery_read_marker) {
157 assert(0 == "unhandled case");
161 bool get_excl_lock() {
175 assert(0 == "unhandled case");
179 bool get_excl(OpRequestRef op) {
180 if (get_excl_lock()) {
184 waiters.push_back(op);
187 /// same as get_write_lock, but ignore starvation
188 bool take_write_lock() {
189 if (state == RWWRITE) {
193 return get_write_lock();
195 void dec(list<OpRequestRef> *requeue) {
201 requeue->splice(requeue->end(), waiters);
204 void put_read(list<OpRequestRef> *requeue) {
205 assert(state == RWREAD);
208 void put_write(list<OpRequestRef> *requeue) {
209 assert(state == RWWRITE);
212 void put_excl(list<OpRequestRef> *requeue) {
213 assert(state == RWEXCL);
216 bool empty() const { return state == RWNONE; }
219 bool get_read(OpRequestRef op) {
220 return rwstate.get_read(op);
222 bool get_write(OpRequestRef op) {
223 return rwstate.get_write(op, false);
225 bool get_excl(OpRequestRef op) {
226 return rwstate.get_excl(op);
228 bool get_lock_type(OpRequestRef op, RWState::State type) {
230 case RWState::RWWRITE:
231 return get_write(op);
232 case RWState::RWREAD:
234 case RWState::RWEXCL:
237 assert(0 == "invalid lock type");
241 bool get_write_greedy(OpRequestRef op) {
242 return rwstate.get_write(op, true);
244 bool get_snaptrimmer_write(bool mark_if_unsuccessful) {
245 if (rwstate.get_write_lock()) {
248 if (mark_if_unsuccessful)
249 rwstate.snaptrimmer_write_marker = true;
253 bool get_recovery_read() {
254 rwstate.recovery_read_marker = true;
255 if (rwstate.get_read_lock()) {
260 bool try_get_read_lock() {
261 return rwstate.get_read_lock();
263 void drop_recovery_read(list<OpRequestRef> *ls) {
264 assert(rwstate.recovery_read_marker);
265 rwstate.put_read(ls);
266 rwstate.recovery_read_marker = false;
269 ObjectContext::RWState::State type,
270 list<OpRequestRef> *to_wake,
271 bool *requeue_recovery,
272 bool *requeue_snaptrimmer) {
274 case ObjectContext::RWState::RWWRITE:
275 rwstate.put_write(to_wake);
277 case ObjectContext::RWState::RWREAD:
278 rwstate.put_read(to_wake);
280 case ObjectContext::RWState::RWEXCL:
281 rwstate.put_excl(to_wake);
284 assert(0 == "invalid lock type");
286 if (rwstate.empty() && rwstate.recovery_read_marker) {
287 rwstate.recovery_read_marker = false;
288 *requeue_recovery = true;
290 if (rwstate.empty() && rwstate.snaptrimmer_write_marker) {
291 rwstate.snaptrimmer_write_marker = false;
292 *requeue_snaptrimmer = true;
295 bool is_request_pending() {
296 return (rwstate.count > 0);
301 destructor_callback(0),
302 lock("PrimaryLogPG::ObjectContext::lock"),
303 unstable_writes(0), readers(0), writers_waiting(0), readers_waiting(0),
304 blocked(false), requeue_scrub_on_unblock(false) {}
307 assert(rwstate.empty());
308 if (destructor_callback)
309 destructor_callback->complete(0);
320 bool is_blocked() const {
324 // do simple synchronous mutual exclusion, for now. no waitqueues or anything fancy.
325 void ondisk_write_lock() {
328 while (readers_waiting || readers)
334 void ondisk_write_unlock() {
336 assert(unstable_writes > 0);
338 if (!unstable_writes && readers_waiting)
342 void ondisk_read_lock() {
345 while (unstable_writes)
351 void ondisk_read_unlock() {
355 if (!readers && writers_waiting)
360 /// in-progress copyfrom ops for this object
362 bool requeue_scrub_on_unblock:1; // true if we need to requeue scrub on unblock
366 inline ostream& operator<<(ostream& out, const ObjectState& obs)
374 inline ostream& operator<<(ostream& out, const ObjectContext::RWState& rw)
376 return out << "rwstate(" << rw.get_state_name()
378 << " w=" << rw.waiters.size()
382 inline ostream& operator<<(ostream& out, const ObjectContext& obc)
384 return out << "obc(" << obc.obs << " " << obc.rwstate << ")";
387 class ObcLockManager {
388 struct ObjectLockState {
389 ObjectContextRef obc;
390 ObjectContext::RWState::State type;
392 ObjectContextRef obc,
393 ObjectContext::RWState::State type)
394 : obc(obc), type(type) {}
396 map<hobject_t, ObjectLockState> locks;
398 ObcLockManager() = default;
399 ObcLockManager(ObcLockManager &&) = default;
400 ObcLockManager(const ObcLockManager &) = delete;
401 ObcLockManager &operator=(ObcLockManager &&) = default;
403 return locks.empty();
406 ObjectContext::RWState::State type,
407 const hobject_t &hoid,
408 ObjectContextRef obc,
410 assert(locks.find(hoid) == locks.end());
411 if (obc->get_lock_type(op, type)) {
412 locks.insert(make_pair(hoid, ObjectLockState(obc, type)));
418 /// Get write lock, ignore starvation
419 bool take_write_lock(
420 const hobject_t &hoid,
421 ObjectContextRef obc) {
422 assert(locks.find(hoid) == locks.end());
423 if (obc->rwstate.take_write_lock()) {
426 hoid, ObjectLockState(obc, ObjectContext::RWState::RWWRITE)));
432 /// Get write lock for snap trim
433 bool get_snaptrimmer_write(
434 const hobject_t &hoid,
435 ObjectContextRef obc,
436 bool mark_if_unsuccessful) {
437 assert(locks.find(hoid) == locks.end());
438 if (obc->get_snaptrimmer_write(mark_if_unsuccessful)) {
441 hoid, ObjectLockState(obc, ObjectContext::RWState::RWWRITE)));
447 /// Get write lock greedy
448 bool get_write_greedy(
449 const hobject_t &hoid,
450 ObjectContextRef obc,
452 assert(locks.find(hoid) == locks.end());
453 if (obc->get_write_greedy(op)) {
456 hoid, ObjectLockState(obc, ObjectContext::RWState::RWWRITE)));
463 /// try get read lock
464 bool try_get_read_lock(
465 const hobject_t &hoid,
466 ObjectContextRef obc) {
467 assert(locks.find(hoid) == locks.end());
468 if (obc->try_get_read_lock()) {
472 ObjectLockState(obc, ObjectContext::RWState::RWREAD)));
480 list<pair<hobject_t, list<OpRequestRef> > > *to_requeue,
481 bool *requeue_recovery,
482 bool *requeue_snaptrimmer) {
483 for (auto p: locks) {
484 list<OpRequestRef> _to_requeue;
485 p.second.obc->put_lock_type(
489 requeue_snaptrimmer);
491 to_requeue->push_back(
493 p.second.obc->obs.oi.soid,
494 std::move(_to_requeue)));
500 assert(locks.empty());