1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 #ifndef DBOBJECTMAP_DB_H
3 #define DBOBJECTMAP_DB_H
5 #include "include/buffer_fwd.h"
11 #include "include/memory.h"
12 #include <boost/scoped_ptr.hpp>
14 #include "os/ObjectMap.h"
15 #include "kv/KeyValueDB.h"
16 #include "osd/osd_types.h"
17 #include "common/Mutex.h"
18 #include "common/Cond.h"
19 #include "common/simple_cache.hpp"
20 #include <boost/optional/optional_io.hpp>
22 #include "SequencerPosition.h"
25 * DBObjectMap: Implements ObjectMap in terms of KeyValueDB
27 * Prefix space structure:
29 * @see complete_prefix
33 * - HOBJECT_TO_SEQ: Contains leaf mapping from ghobject_t->header.seq and
34 * corresponding omap header
35 * - SYS_PREFIX: GLOBAL_STATE_KEY - contains next seq number
39 * @see generate_new_header
40 * - USER_PREFIX + header_key(header->seq) + USER_PREFIX
41 * : key->value for header->seq
42 * - USER_PREFIX + header_key(header->seq) + COMPLETE_PREFIX: see below
43 * - USER_PREFIX + header_key(header->seq) + XATTR_PREFIX: xattrs
44 * - USER_PREFIX + header_key(header->seq) + SYS_PREFIX
45 * : USER_HEADER_KEY - omap header for header->seq
46 * : HEADER_KEY - encoding of header for header->seq
48 * For each node (represented by a header), we
49 * store three mappings: the key mapping, the complete mapping, and the parent.
50 * The complete mapping (COMPLETE_PREFIX space) is key->key. Each x->y entry in
51 * this mapping indicates that the key mapping contains all entries on [x,y).
52 * Note, max string is represented by "", so ""->"" indicates that the parent
53 * is unnecessary (@see rm_keys). When looking up a key not contained in the
54 * the complete set, we have to check the parent if we don't find it in the
55 * key set. During rm_keys, we copy keys from the parent and update the
56 * complete set to reflect the change @see rm_keys.
58 class DBObjectMap : public ObjectMap {
60 boost::scoped_ptr<KeyValueDB> db;
63 * Serializes access to next_seq as well as the in_use set
70 * Set of headers currently in use
73 set<ghobject_t> map_header_in_use;
76 * Takes the map_header_in_use entry in constructor, releases in
81 boost::optional<ghobject_t> locked;
83 MapHeaderLock(const MapHeaderLock &);
84 MapHeaderLock &operator=(const MapHeaderLock &);
86 explicit MapHeaderLock(DBObjectMap *db) : db(db) {}
87 MapHeaderLock(DBObjectMap *db, const ghobject_t &oid) : db(db), locked(oid) {
88 Mutex::Locker l(db->header_lock);
89 while (db->map_header_in_use.count(*locked))
90 db->map_header_cond.Wait(db->header_lock);
91 db->map_header_in_use.insert(*locked);
94 const ghobject_t &get_locked() const {
99 void swap(MapHeaderLock &o) {
102 // centos6's boost optional doesn't seem to have swap :(
103 boost::optional<ghobject_t> _locked = o.locked;
110 Mutex::Locker l(db->header_lock);
111 assert(db->map_header_in_use.count(*locked));
112 db->map_header_cond.Signal();
113 db->map_header_in_use.erase(*locked);
118 DBObjectMap(CephContext* cct, KeyValueDB *db)
119 : ObjectMap(cct), db(db), header_lock("DBOBjectMap"),
120 cache_lock("DBObjectMap::CacheLock"),
121 caches(cct->_conf->filestore_omap_header_cache_size)
125 const ghobject_t &oid,
126 const map<string, bufferlist> &set,
127 const SequencerPosition *spos=0
131 const ghobject_t &oid,
132 const bufferlist &bl,
133 const SequencerPosition *spos=0
137 const ghobject_t &oid,
142 const ghobject_t &oid,
143 const SequencerPosition *spos=0
146 int clear_keys_header(
147 const ghobject_t &oid,
148 const SequencerPosition *spos=0
152 const ghobject_t &oid,
153 const set<string> &to_clear,
154 const SequencerPosition *spos=0
158 const ghobject_t &oid,
160 map<string, bufferlist> *out
164 const ghobject_t &oid,
169 const ghobject_t &oid,
170 const set<string> &keys,
171 map<string, bufferlist> *out
175 const ghobject_t &oid,
176 const set<string> &keys,
181 const ghobject_t &oid,
182 const set<string> &to_get,
183 map<string, bufferlist> *out
187 const ghobject_t &oid,
192 const ghobject_t &oid,
193 const map<string, bufferlist> &to_set,
194 const SequencerPosition *spos=0
198 const ghobject_t &oid,
199 const set<string> &to_remove,
200 const SequencerPosition *spos=0
204 const ghobject_t &oid,
205 const ghobject_t &target,
206 const SequencerPosition *spos=0
210 const ghobject_t &from,
211 const ghobject_t &to,
212 const SequencerPosition *spos=0
216 const ghobject_t &oid,
217 const ghobject_t &target,
218 const SequencerPosition *spos=0
221 /// Read initial state from backing store
223 /// Write current state settings to DB
225 /// Read initial state and upgrade or initialize state
226 int init(bool upgrade = false);
228 /// Upgrade store to current version
231 /// Consistency check, debug, there must be no parallel writes
232 int check(std::ostream &out, bool repair = false, bool force = false) override;
234 /// Ensure that all previous operations are durable
235 int sync(const ghobject_t *oid=0, const SequencerPosition *spos=0) override;
237 void compact() override {
242 /// Util, get all objects, there must be no other concurrent access
243 int list_objects(vector<ghobject_t> *objs ///< [out] objects
247 // Util, get all object headers, there must be no other concurrent access
248 int list_object_headers(vector<_Header> *out ///< [out] headers
251 ObjectMapIterator get_iterator(const ghobject_t &oid) override;
253 static const string USER_PREFIX;
254 static const string XATTR_PREFIX;
255 static const string SYS_PREFIX;
256 static const string COMPLETE_PREFIX;
257 static const string HEADER_KEY;
258 static const string USER_HEADER_KEY;
259 static const string GLOBAL_STATE_KEY;
260 static const string HOBJECT_TO_SEQ;
263 static const string LEAF_PREFIX;
264 static const string REVERSE_LEAF_PREFIX;
266 /// persistent state for store @see generate_header
268 static const __u8 CUR_VERSION = 3;
271 // legacy is false when complete regions never used
273 State() : v(0), seq(1), legacy(false) {}
274 explicit State(uint64_t seq) : v(0), seq(seq), legacy(false) {}
276 void encode(bufferlist &bl) const {
277 ENCODE_START(3, 1, bl);
280 ::encode(legacy, bl);
284 void decode(bufferlist::iterator &bl) {
292 ::decode(legacy, bl);
298 void dump(Formatter *f) const {
299 f->dump_unsigned("v", v);
300 f->dump_unsigned("seq", seq);
301 f->dump_bool("legacy", legacy);
304 static void generate_test_instances(list<State*> &o) {
305 o.push_back(new State(0));
306 o.push_back(new State(20));
313 uint64_t num_children;
317 SequencerPosition spos;
319 void encode(bufferlist &bl) const {
321 ENCODE_START(2, 1, bl);
323 ::encode(parent, bl);
324 ::encode(num_children, bl);
325 ::encode(unused, bl);
331 void decode(bufferlist::iterator &bl) {
335 ::decode(parent, bl);
336 ::decode(num_children, bl);
337 ::decode(unused, bl);
344 void dump(Formatter *f) const {
345 f->dump_unsigned("seq", seq);
346 f->dump_unsigned("parent", parent);
347 f->dump_unsigned("num_children", num_children);
348 f->dump_stream("oid") << oid;
351 static void generate_test_instances(list<_Header*> &o) {
352 o.push_back(new _Header);
353 o.push_back(new _Header);
354 o.back()->parent = 20;
358 _Header() : seq(0), parent(0), num_children(1) {}
361 /// String munging (public for testing)
362 static string ghobject_key(const ghobject_t &oid);
363 static string ghobject_key_v0(coll_t c, const ghobject_t &oid);
364 static int is_buggy_ghobject_key_v1(CephContext* cct,
367 /// Implicit lock on Header->seq
368 typedef ceph::shared_ptr<_Header> Header;
370 SimpleLRU<ghobject_t, _Header> caches;
372 string map_header_key(const ghobject_t &oid);
373 string header_key(uint64_t seq);
374 string complete_prefix(Header header);
375 string user_prefix(Header header);
376 string sys_prefix(Header header);
377 string xattr_prefix(Header header);
378 string sys_parent_prefix(_Header header);
379 string sys_parent_prefix(Header header) {
380 return sys_parent_prefix(*header);
383 class EmptyIteratorImpl : public ObjectMapIteratorImpl {
385 int seek_to_first() override { return 0; }
386 int seek_to_last() { return 0; }
387 int upper_bound(const string &after) override { return 0; }
388 int lower_bound(const string &to) override { return 0; }
389 bool valid() override { return false; }
390 int next(bool validate=true) override { ceph_abort(); return 0; }
391 string key() override { ceph_abort(); return ""; }
392 bufferlist value() override { ceph_abort(); return bufferlist(); }
393 int status() override { return 0; }
398 class DBObjectMapIteratorImpl : public ObjectMapIteratorImpl {
402 /// NOTE: implicit lock hlock->get_locked() when returned out of the class
404 /// NOTE: implicit lock on header->seq AND for all ancestors
407 /// parent_iter == NULL iff no parent
408 ceph::shared_ptr<DBObjectMapIteratorImpl> parent_iter;
409 KeyValueDB::Iterator key_iter;
410 KeyValueDB::Iterator complete_iter;
412 /// cur_iter points to currently valid iterator
413 ceph::shared_ptr<ObjectMapIteratorImpl> cur_iter;
416 /// init() called, key_iter, complete_iter, parent_iter filled in
421 DBObjectMapIteratorImpl(DBObjectMap *map, Header header) :
422 map(map), hlock(map), header(header), r(0), ready(false), invalid(true) {}
423 int seek_to_first() override;
425 int upper_bound(const string &after) override;
426 int lower_bound(const string &to) override;
427 bool valid() override;
428 int next(bool validate=true) override;
429 string key() override;
430 bufferlist value() override;
431 int status() override;
434 return cur_iter == parent_iter;
437 /// skips to next valid parent entry
440 /// first parent() >= to
441 int lower_bound_parent(const string &to);
444 * Tests whether to_test is in complete region
446 * postcondition: complete_iter will be max s.t. complete_iter->value > to_test
448 int in_complete_region(const string &to_test, ///< [in] key to test
449 string *begin, ///< [out] beginning of region
450 string *end ///< [out] end of region
451 ); ///< @returns true if to_test is in the complete region, else false
459 typedef ceph::shared_ptr<DBObjectMapIteratorImpl> DBObjectMapIterator;
460 DBObjectMapIterator _get_iterator(Header header) {
461 return std::make_shared<DBObjectMapIteratorImpl>(this, header);
466 /// Removes node corresponding to header
467 void clear_header(Header header, KeyValueDB::Transaction t);
469 /// Set node containing input to new contents
470 void set_header(Header input, KeyValueDB::Transaction t);
472 /// Remove leaf node corresponding to oid in c
473 void remove_map_header(
474 const MapHeaderLock &l,
475 const ghobject_t &oid,
477 KeyValueDB::Transaction t);
479 /// Set leaf node for c and oid to the value of header
481 const MapHeaderLock &l,
482 const ghobject_t &oid, _Header header,
483 KeyValueDB::Transaction t);
485 /// Set leaf node for c and oid to the value of header
486 bool check_spos(const ghobject_t &oid,
488 const SequencerPosition *spos);
490 /// Lookup or create header for c oid
491 Header lookup_create_map_header(
492 const MapHeaderLock &l,
493 const ghobject_t &oid,
494 KeyValueDB::Transaction t);
497 * Generate new header for c oid with new seq number
499 * Has the side effect of syncronously saving the new DBObjectMap state
501 Header _generate_new_header(const ghobject_t &oid, Header parent);
502 Header generate_new_header(const ghobject_t &oid, Header parent) {
503 Mutex::Locker l(header_lock);
504 return _generate_new_header(oid, parent);
507 /// Lookup leaf header for c oid
508 Header _lookup_map_header(
509 const MapHeaderLock &l,
510 const ghobject_t &oid);
511 Header lookup_map_header(
512 const MapHeaderLock &l2,
513 const ghobject_t &oid) {
514 Mutex::Locker l(header_lock);
515 return _lookup_map_header(l2, oid);
518 /// Lookup header node for input
519 Header lookup_parent(Header input);
523 int _get_header(Header header, bufferlist *bl);
525 /// Scan keys in header into out_keys and out_values (if nonnull)
526 int scan(Header header,
527 const set<string> &in_keys,
528 set<string> *out_keys,
529 map<string, bufferlist> *out_values);
531 /// Remove header and all related prefixes
532 int _clear(Header header,
533 KeyValueDB::Transaction t);
535 /* Scan complete region bumping *begin to the beginning of any
536 * containing region and adding all complete region keys between
537 * the updated begin and end to the complete_keys_to_remove set */
538 int merge_new_complete(DBObjectMapIterator &iter,
541 set<string> *complete_keys_to_remove);
543 /// Writes out State (mainly next_seq)
544 int write_state(KeyValueDB::Transaction _t =
545 KeyValueDB::Transaction());
547 /// Copies header entry from parent @see rm_keys
548 int copy_up_header(Header header,
549 KeyValueDB::Transaction t);
551 /// Sets header @see set_header
552 void _set_header(Header header, const bufferlist &bl,
553 KeyValueDB::Transaction t);
556 * Removes header seq lock and possibly object lock
557 * once Header is out of scope
559 * @see generate_new_header
561 class RemoveOnDelete {
564 explicit RemoveOnDelete(DBObjectMap *db) :
566 void operator() (_Header *header) {
567 Mutex::Locker l(db->header_lock);
568 assert(db->in_use.count(header->seq));
569 db->in_use.erase(header->seq);
570 db->header_cond.Signal();
574 friend class RemoveOnDelete;
576 WRITE_CLASS_ENCODER(DBObjectMap::_Header)
577 WRITE_CLASS_ENCODER(DBObjectMap::State)
579 ostream& operator<<(ostream& out, const DBObjectMap::_Header& h);