1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
16 #ifndef CEPH_SIMPLELOCK_H
17 #define CEPH_SIMPLELOCK_H
19 #include <boost/intrusive_ptr.hpp>
21 #include "MDSCacheObject.h"
22 #include "MDSContext.h"
27 inline const char *get_lock_type_name(int t) {
29 case CEPH_LOCK_DN: return "dn";
30 case CEPH_LOCK_DVERSION: return "dversion";
31 case CEPH_LOCK_IVERSION: return "iversion";
32 case CEPH_LOCK_IFILE: return "ifile";
33 case CEPH_LOCK_IAUTH: return "iauth";
34 case CEPH_LOCK_ILINK: return "ilink";
35 case CEPH_LOCK_IDFT: return "idft";
36 case CEPH_LOCK_INEST: return "inest";
37 case CEPH_LOCK_IXATTR: return "ixattr";
38 case CEPH_LOCK_ISNAP: return "isnap";
39 case CEPH_LOCK_INO: return "ino";
40 case CEPH_LOCK_IFLOCK: return "iflock";
41 case CEPH_LOCK_IPOLICY: return "ipolicy";
42 default: ceph_abort(); return 0;
46 #include "include/memory.h"
49 typedef boost::intrusive_ptr<MutationImpl> MutationRef;
64 explicit LockType(int t) : type(t) {
69 case CEPH_LOCK_IXATTR:
71 case CEPH_LOCK_IFLOCK:
72 case CEPH_LOCK_IPOLICY:
82 case CEPH_LOCK_DVERSION:
83 case CEPH_LOCK_IVERSION:
98 const char *get_state_name(int n) const {
100 case LOCK_UNDEF: return "UNDEF";
101 case LOCK_SYNC: return "sync";
102 case LOCK_LOCK: return "lock";
104 case LOCK_PREXLOCK: return "prexlock";
105 case LOCK_XLOCK: return "xlock";
106 case LOCK_XLOCKDONE: return "xlockdone";
107 case LOCK_XLOCKSNAP: return "xlocksnap";
108 case LOCK_LOCK_XLOCK: return "lock->xlock";
110 case LOCK_SYNC_LOCK: return "sync->lock";
111 case LOCK_LOCK_SYNC: return "lock->sync";
112 case LOCK_REMOTEXLOCK: return "remote_xlock";
113 case LOCK_EXCL: return "excl";
114 case LOCK_EXCL_SYNC: return "excl->sync";
115 case LOCK_EXCL_LOCK: return "excl->lock";
116 case LOCK_SYNC_EXCL: return "sync->excl";
117 case LOCK_LOCK_EXCL: return "lock->excl";
119 case LOCK_XSYN: return "xsyn";
120 case LOCK_XSYN_EXCL: return "xsyn->excl";
121 case LOCK_EXCL_XSYN: return "excl->xsyn";
122 case LOCK_XSYN_SYNC: return "xsyn->sync";
124 case LOCK_SYNC_MIX: return "sync->mix";
125 case LOCK_SYNC_MIX2: return "sync->mix(2)";
126 case LOCK_LOCK_TSYN: return "lock->tsyn";
128 case LOCK_MIX_LOCK: return "mix->lock";
129 case LOCK_MIX_LOCK2: return "mix->lock(2)";
130 case LOCK_MIX: return "mix";
131 case LOCK_MIX_TSYN: return "mix->tsyn";
133 case LOCK_TSYN_MIX: return "tsyn->mix";
134 case LOCK_TSYN_LOCK: return "tsyn->lock";
135 case LOCK_TSYN: return "tsyn";
137 case LOCK_MIX_SYNC: return "mix->sync";
138 case LOCK_MIX_SYNC2: return "mix->sync(2)";
139 case LOCK_EXCL_MIX: return "excl->mix";
140 case LOCK_MIX_EXCL: return "mix->excl";
142 case LOCK_PRE_SCAN: return "*->scan";
143 case LOCK_SCAN: return "scan";
145 case LOCK_SNAP_SYNC: return "snap->sync";
147 default: ceph_abort(); return 0;
153 static const uint64_t WAIT_RD = (1<<0); // to read
154 static const uint64_t WAIT_WR = (1<<1); // to write
155 static const uint64_t WAIT_XLOCK = (1<<2); // to xlock (** dup)
156 static const uint64_t WAIT_STABLE = (1<<2); // for a stable state
157 static const uint64_t WAIT_REMOTEXLOCK = (1<<3); // for a remote xlock
158 static const int WAIT_BITS = 4;
159 static const uint64_t WAIT_ALL = ((1<<WAIT_BITS)-1);
163 // parent (what i lock)
164 MDSCacheObject *parent;
171 __s32 num_client_lease;
173 struct unstable_bits_t {
174 set<__s32> gather_set; // auth+rep. >= 0 is mds, < 0 is client
177 int num_wrlock, num_xlock;
178 MutationRef xlock_by;
179 client_t xlock_by_client;
180 client_t excl_client;
184 gather_set.empty() &&
187 xlock_by.get() == NULL &&
188 xlock_by_client == -1 &&
192 unstable_bits_t() : num_wrlock(0),
199 mutable std::unique_ptr<unstable_bits_t> _unstable;
201 bool have_more() const { return _unstable ? true : false; }
202 unstable_bits_t *more() const {
204 _unstable.reset(new unstable_bits_t);
205 return _unstable.get();
207 void try_clear_more() {
208 if (_unstable && _unstable->empty()) {
215 client_t get_excl_client() const {
216 return have_more() ? more()->excl_client : -1;
218 void set_excl_client(client_t c) {
219 if (c < 0 && !have_more())
220 return; // default is -1
221 more()->excl_client = c;
224 SimpleLock(MDSCacheObject *o, LockType *lt) :
231 virtual ~SimpleLock() {}
233 virtual bool is_scatterlock() const {
236 virtual bool is_locallock() const {
241 MDSCacheObject *get_parent() { return parent; }
242 int get_type() const { return type->type; }
243 const sm_t* get_sm() const { return type->sm; }
245 int get_wait_shift() const {
246 switch (get_type()) {
247 case CEPH_LOCK_DN: return 8;
248 case CEPH_LOCK_DVERSION: return 8 + 1*SimpleLock::WAIT_BITS;
249 case CEPH_LOCK_IAUTH: return 8 + 2*SimpleLock::WAIT_BITS;
250 case CEPH_LOCK_ILINK: return 8 + 3*SimpleLock::WAIT_BITS;
251 case CEPH_LOCK_IDFT: return 8 + 4*SimpleLock::WAIT_BITS;
252 case CEPH_LOCK_IFILE: return 8 + 5*SimpleLock::WAIT_BITS;
253 case CEPH_LOCK_IVERSION: return 8 + 6*SimpleLock::WAIT_BITS;
254 case CEPH_LOCK_IXATTR: return 8 + 7*SimpleLock::WAIT_BITS;
255 case CEPH_LOCK_ISNAP: return 8 + 8*SimpleLock::WAIT_BITS;
256 case CEPH_LOCK_INEST: return 8 + 9*SimpleLock::WAIT_BITS;
257 case CEPH_LOCK_IFLOCK: return 8 +10*SimpleLock::WAIT_BITS;
258 case CEPH_LOCK_IPOLICY: return 8 +11*SimpleLock::WAIT_BITS;
264 int get_cap_shift() const {
265 switch (get_type()) {
266 case CEPH_LOCK_IAUTH: return CEPH_CAP_SAUTH;
267 case CEPH_LOCK_ILINK: return CEPH_CAP_SLINK;
268 case CEPH_LOCK_IFILE: return CEPH_CAP_SFILE;
269 case CEPH_LOCK_IXATTR: return CEPH_CAP_SXATTR;
273 int get_cap_mask() const {
274 switch (get_type()) {
275 case CEPH_LOCK_IFILE: return (1 << CEPH_CAP_FILE_BITS) - 1;
276 default: return (1 << CEPH_CAP_SIMPLE_BITS) - 1;
281 bool operator()(const SimpleLock* l, const SimpleLock* r) const {
282 // first sort by object type (dn < inode)
283 if (!(l->type->type > CEPH_LOCK_DN) && (r->type->type > CEPH_LOCK_DN)) return true;
284 if ((l->type->type > CEPH_LOCK_DN) == (r->type->type > CEPH_LOCK_DN)) {
285 // then sort by object
286 if (l->parent->is_lt(r->parent)) return true;
287 if (l->parent == r->parent) {
288 // then sort by (inode) lock type
289 if (l->type->type < r->type->type) return true;
296 void decode_locked_state(bufferlist& bl) {
297 parent->decode_lock_state(type->type, bl);
299 void encode_locked_state(bufferlist& bl) {
300 parent->encode_lock_state(type->type, bl);
302 void finish_waiters(uint64_t mask, int r=0) {
303 parent->finish_waiting(mask << get_wait_shift(), r);
305 void take_waiting(uint64_t mask, list<MDSInternalContextBase*>& ls) {
306 parent->take_waiting(mask << get_wait_shift(), ls);
308 void add_waiter(uint64_t mask, MDSInternalContextBase *c) {
309 parent->add_waiter((mask << get_wait_shift()) | MDSCacheObject::WAIT_ORDERED, c);
311 bool is_waiter_for(uint64_t mask) const {
312 return parent->is_waiter_for(mask << get_wait_shift());
318 int get_state() const { return state; }
319 int set_state(int s) {
321 //assert(!is_stable() || gather_set.size() == 0); // gather should be empty in stable states.
324 void set_state_rejoin(int s, list<MDSInternalContextBase*>& waiters) {
325 if (!is_stable() && get_parent()->is_auth()) {
327 get_parent()->auth_unpin(this);
332 take_waiting(SimpleLock::WAIT_ALL, waiters);
335 bool is_stable() const {
336 return get_sm()->states[state].next == 0;
338 bool is_unstable_and_locked() const {
341 return is_rdlocked() || is_wrlocked() || is_xlocked();
343 int get_next_state() {
344 return get_sm()->states[state].next;
348 bool is_sync_and_unlocked() const {
350 get_state() == LOCK_SYNC &&
359 bool fw_rdlock_to_auth() {
360 return get_sm()->states[state].can_rdlock == FW;
363 bool req_rdlock_from_auth() {
364 return get_sm()->states[state].can_rdlock == REQ;
368 static set<int32_t> empty_gather_set;
370 // int32_t: <0 is client, >=0 is MDS rank
371 const set<int32_t>& get_gather_set() const {
372 return have_more() ? more()->gather_set : empty_gather_set;
376 for (const auto p : parent->get_replicas()) {
377 more()->gather_set.insert(p.first);
380 bool is_gathering() const {
381 return have_more() && !more()->gather_set.empty();
383 bool is_gathering(int32_t i) const {
384 return have_more() && more()->gather_set.count(i);
386 void clear_gather() {
388 more()->gather_set.clear();
390 void remove_gather(int32_t i) {
392 more()->gather_set.erase(i);
397 virtual bool is_dirty() const { return false; }
398 virtual bool is_stale() const { return false; }
399 virtual bool is_flushing() const { return false; }
400 virtual bool is_flushed() const { return false; }
401 virtual void clear_flushed() { }
404 bool can_lease(client_t client) const {
405 return get_sm()->states[state].can_lease == ANY ||
406 (get_sm()->states[state].can_lease == AUTH && parent->is_auth()) ||
407 (get_sm()->states[state].can_lease == XCL && client >= 0 && get_xlock_by_client() == client);
409 bool can_read(client_t client) const {
410 return get_sm()->states[state].can_read == ANY ||
411 (get_sm()->states[state].can_read == AUTH && parent->is_auth()) ||
412 (get_sm()->states[state].can_read == XCL && client >= 0 && get_xlock_by_client() == client);
414 bool can_read_projected(client_t client) const {
415 return get_sm()->states[state].can_read_projected == ANY ||
416 (get_sm()->states[state].can_read_projected == AUTH && parent->is_auth()) ||
417 (get_sm()->states[state].can_read_projected == XCL && client >= 0 && get_xlock_by_client() == client);
419 bool can_rdlock(client_t client) const {
420 return get_sm()->states[state].can_rdlock == ANY ||
421 (get_sm()->states[state].can_rdlock == AUTH && parent->is_auth()) ||
422 (get_sm()->states[state].can_rdlock == XCL && client >= 0 && get_xlock_by_client() == client);
424 bool can_wrlock(client_t client) const {
425 return get_sm()->states[state].can_wrlock == ANY ||
426 (get_sm()->states[state].can_wrlock == AUTH && parent->is_auth()) ||
427 (get_sm()->states[state].can_wrlock == XCL && client >= 0 && (get_xlock_by_client() == client ||
428 get_excl_client() == client));
430 bool can_force_wrlock(client_t client) const {
431 return get_sm()->states[state].can_force_wrlock == ANY ||
432 (get_sm()->states[state].can_force_wrlock == AUTH && parent->is_auth()) ||
433 (get_sm()->states[state].can_force_wrlock == XCL && client >= 0 && (get_xlock_by_client() == client ||
434 get_excl_client() == client));
436 bool can_xlock(client_t client) const {
437 return get_sm()->states[state].can_xlock == ANY ||
438 (get_sm()->states[state].can_xlock == AUTH && parent->is_auth()) ||
439 (get_sm()->states[state].can_xlock == XCL && client >= 0 && get_xlock_by_client() == client);
443 bool is_rdlocked() const { return num_rdlock > 0; }
446 parent->get(MDSCacheObject::PIN_LOCK);
450 assert(num_rdlock>0);
453 parent->put(MDSCacheObject::PIN_LOCK);
456 int get_num_rdlocks() const {
461 void get_wrlock(bool force=false) {
462 //assert(can_wrlock() || force);
463 if (more()->num_wrlock == 0)
464 parent->get(MDSCacheObject::PIN_LOCK);
465 ++more()->num_wrlock;
468 --more()->num_wrlock;
469 if (more()->num_wrlock == 0) {
470 parent->put(MDSCacheObject::PIN_LOCK);
474 bool is_wrlocked() const {
475 return have_more() && more()->num_wrlock > 0;
477 int get_num_wrlocks() const {
478 return have_more() ? more()->num_wrlock : 0;
482 void get_xlock(MutationRef who, client_t client) {
483 assert(get_xlock_by() == MutationRef());
484 assert(state == LOCK_XLOCK || is_locallock() ||
485 state == LOCK_LOCK /* if we are a slave */);
486 parent->get(MDSCacheObject::PIN_LOCK);
488 more()->xlock_by = who;
489 more()->xlock_by_client = client;
491 void set_xlock_done() {
492 assert(more()->xlock_by);
493 assert(state == LOCK_XLOCK || is_locallock() ||
494 state == LOCK_LOCK /* if we are a slave */);
496 state = LOCK_XLOCKDONE;
497 more()->xlock_by.reset();
500 assert(state == LOCK_XLOCK || state == LOCK_XLOCKDONE ||
501 state == LOCK_XLOCKSNAP || is_locallock() ||
502 state == LOCK_LOCK /* if we are a master of a slave */);
504 parent->put(MDSCacheObject::PIN_LOCK);
505 if (more()->num_xlock == 0) {
506 more()->xlock_by.reset();
507 more()->xlock_by_client = -1;
511 bool is_xlocked() const {
512 return have_more() && more()->num_xlock > 0;
514 int get_num_xlocks() const {
515 return have_more() ? more()->num_xlock : 0;
517 client_t get_xlock_by_client() const {
518 return have_more() ? more()->xlock_by_client : -1;
520 bool is_xlocked_by_client(client_t c) const {
521 return have_more() ? more()->xlock_by_client == c : false;
523 MutationRef get_xlock_by() const {
524 return have_more() ? more()->xlock_by : MutationRef();
528 void get_client_lease() {
531 void put_client_lease() {
532 assert(num_client_lease > 0);
534 if (num_client_lease == 0) {
538 bool is_leased() const {
539 return num_client_lease > 0;
541 int get_num_client_lease() const {
542 return num_client_lease;
545 bool is_used() const {
546 return is_xlocked() || is_rdlocked() || is_wrlocked() || num_client_lease;
550 void encode(bufferlist& bl) const {
551 ENCODE_START(2, 2, bl);
554 ::encode(more()->gather_set, bl);
556 ::encode(empty_gather_set, bl);
559 void decode(bufferlist::iterator& p) {
565 more()->gather_set.swap(g);
568 void encode_state_for_replica(bufferlist& bl) const {
569 __s16 s = get_replica_state();
572 void decode_state(bufferlist::iterator& p, bool is_new=true) {
578 void decode_state_rejoin(bufferlist::iterator& p, list<MDSInternalContextBase*>& waiters) {
581 set_state_rejoin(s, waiters);
586 bool is_loner_mode() const {
587 return get_sm()->states[state].loner;
589 int gcaps_allowed_ever() const {
590 return parent->is_auth() ? get_sm()->allowed_ever_auth : get_sm()->allowed_ever_replica;
592 int gcaps_allowed(int who, int s=-1) const {
593 if (s < 0) s = state;
594 if (parent->is_auth()) {
595 if (get_xlock_by_client() >= 0 && who == CAP_XLOCKER)
596 return get_sm()->states[s].xlocker_caps | get_sm()->states[s].caps; // xlocker always gets more
597 else if (is_loner_mode() && who == CAP_ANY)
598 return get_sm()->states[s].caps;
600 return get_sm()->states[s].loner_caps | get_sm()->states[s].caps; // loner always gets more
602 return get_sm()->states[s].replica_caps;
604 int gcaps_careful() const {
605 if (get_num_wrlocks())
606 return get_sm()->careful;
611 int gcaps_xlocker_mask(client_t client) const {
612 if (client == get_xlock_by_client())
613 return type->type == CEPH_LOCK_IFILE ? 0xf : (CEPH_CAP_GSHARED|CEPH_CAP_GEXCL);
617 // simplelock specifics
618 int get_replica_state() const {
619 return get_sm()->states[state].replica_state;
621 void export_twiddle() {
623 state = get_replica_state();
627 * called on first replica creation.
629 void replicate_relax() {
630 assert(parent->is_auth());
631 assert(!parent->is_replicated());
632 if (state == LOCK_LOCK && !is_used())
635 bool remove_replica(int from) {
636 if (is_gathering(from)) {
643 bool do_import(int from, int to) {
650 if (!is_stable() && !is_gathering())
655 void _print(ostream& out) const {
656 out << get_lock_type_name(get_type()) << " ";
657 out << get_state_name(get_state());
658 if (!get_gather_set().empty())
659 out << " g=" << get_gather_set();
660 if (num_client_lease)
661 out << " l=" << num_client_lease;
663 out << " r=" << get_num_rdlocks();
665 out << " w=" << get_num_wrlocks();
667 out << " x=" << get_num_xlocks();
669 out << " by " << get_xlock_by();
679 * Write bare values (caller must be in an object section)
680 * to formatter, or nothing if is_sync_and_unlocked.
682 void dump(Formatter *f) const;
684 virtual void print(ostream& out) const {
690 WRITE_CLASS_ENCODER(SimpleLock)
692 inline ostream& operator<<(ostream& out, const SimpleLock& l)