+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-#include "SnapRealm.h"
-#include "MDCache.h"
-#include "MDSRank.h"
-
-#include "messages/MClientSnap.h"
-
-
-/*
- * SnapRealm
- */
-
-#define dout_context g_ceph_context
-#define dout_subsys ceph_subsys_mds
-#undef dout_prefix
-#define dout_prefix _prefix(_dout, mdcache->mds->get_nodeid(), inode, srnode.seq, this)
-static ostream& _prefix(std::ostream *_dout, int whoami, const CInode *inode,
- uint64_t seq, const SnapRealm *realm) {
- return *_dout << " mds." << whoami
- << ".cache.snaprealm(" << inode->ino()
- << " seq " << seq << " " << realm << ") ";
-}
-
-ostream& operator<<(ostream& out, const SnapRealm& realm)
-{
- out << "snaprealm(" << realm.inode->ino()
- << " seq " << realm.srnode.seq
- << " lc " << realm.srnode.last_created
- << " cr " << realm.srnode.created;
- if (realm.srnode.created != realm.srnode.current_parent_since)
- out << " cps " << realm.srnode.current_parent_since;
- out << " snaps=" << realm.srnode.snaps;
- if (realm.srnode.past_parents.size()) {
- out << " past_parents=(";
- for (map<snapid_t, snaplink_t>::const_iterator p = realm.srnode.past_parents.begin();
- p != realm.srnode.past_parents.end();
- ++p) {
- if (p != realm.srnode.past_parents.begin()) out << ",";
- out << p->second.first << "-" << p->first
- << "=" << p->second.ino;
- }
- out << ")";
- }
- out << " " << &realm << ")";
- return out;
-}
-
-
-void SnapRealm::add_open_past_parent(SnapRealm *parent, snapid_t last)
-{
- auto p = open_past_parents.find(parent->inode->ino());
- if (p != open_past_parents.end()) {
- assert(p->second.second.count(last) == 0);
- p->second.second.insert(last);
- } else {
- open_past_parents[parent->inode->ino()].first = parent;
- open_past_parents[parent->inode->ino()].second.insert(last);
- parent->open_past_children.insert(this);
- parent->inode->get(CInode::PIN_PASTSNAPPARENT);
- }
- ++num_open_past_parents;
-}
-
-void SnapRealm::remove_open_past_parent(inodeno_t ino, snapid_t last)
-{
- auto p = open_past_parents.find(ino);
- assert(p != open_past_parents.end());
- auto q = p->second.second.find(last);
- assert(q != p->second.second.end());
- p->second.second.erase(q);
- --num_open_past_parents;
- if (p->second.second.empty()) {
- SnapRealm *parent = p->second.first;
- open_past_parents.erase(p);
- parent->open_past_children.erase(this);
- parent->inode->put(CInode::PIN_PASTSNAPPARENT);
- }
-}
-
-struct C_SR_RetryOpenParents : public MDSInternalContextBase {
- SnapRealm *sr;
- snapid_t first, last, parent_last;
- inodeno_t parent;
- MDSInternalContextBase* fin;
- C_SR_RetryOpenParents(SnapRealm *s, snapid_t f, snapid_t l, snapid_t pl,
- inodeno_t p, MDSInternalContextBase *c) :
- sr(s), first(f), last(l), parent_last(pl), parent(p), fin(c) {
- sr->inode->get(CInode::PIN_OPENINGSNAPPARENTS);
- }
- MDSRank *get_mds() override { return sr->mdcache->mds; }
- void finish(int r) override {
- if (r < 0)
- sr->_remove_missing_parent(parent_last, parent, r);
- if (sr->_open_parents(fin, first, last))
- fin->complete(0);
- sr->inode->put(CInode::PIN_OPENINGSNAPPARENTS);
- }
-};
-
-void SnapRealm::_remove_missing_parent(snapid_t snapid, inodeno_t parent, int err)
-{
- map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.find(snapid);
- if (p != srnode.past_parents.end()) {
- dout(10) << __func__ << " " << parent << " [" << p->second.first << ","
- << p->first << "] errno " << err << dendl;
- srnode.past_parents.erase(p);
- } else {
- dout(10) << __func__ << " " << parent << " not found" << dendl;
- }
-}
-
-bool SnapRealm::_open_parents(MDSInternalContextBase *finish, snapid_t first, snapid_t last)
-{
- dout(10) << "open_parents [" << first << "," << last << "]" << dendl;
- if (open)
- return true;
-
- // make sure my current parents' parents are open...
- if (parent) {
- dout(10) << " current parent [" << srnode.current_parent_since << ",head] is " << *parent
- << " on " << *parent->inode << dendl;
- if (last >= srnode.current_parent_since &&
- !parent->_open_parents(finish, MAX(first, srnode.current_parent_since), last))
- return false;
- }
-
- // and my past parents too!
- assert(srnode.past_parents.size() >= num_open_past_parents);
- if (srnode.past_parents.size() > num_open_past_parents) {
- for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.begin();
- p != srnode.past_parents.end(); ) {
- dout(10) << " past_parent [" << p->second.first << "," << p->first << "] is "
- << p->second.ino << dendl;
- CInode *parent = mdcache->get_inode(p->second.ino);
- if (!parent) {
- C_SR_RetryOpenParents *fin = new C_SR_RetryOpenParents(this, first, last, p->first,
- p->second.ino, finish);
- mdcache->open_ino(p->second.ino, mdcache->mds->mdsmap->get_metadata_pool(), fin);
- return false;
- }
- if (parent->state_test(CInode::STATE_PURGING)) {
- dout(10) << " skip purging past_parent " << *parent << dendl;
- srnode.past_parents.erase(p++);
- continue;
- }
- assert(parent->snaprealm); // hmm!
- if (!parent->snaprealm->_open_parents(finish, p->second.first, p->first))
- return false;
- auto q = open_past_parents.find(p->second.ino);
- if (q == open_past_parents.end() ||
- q->second.second.count(p->first) == 0) {
- add_open_past_parent(parent->snaprealm, p->first);
- }
- ++p;
- }
- }
-
- open = true;
- return true;
-}
-
-bool SnapRealm::open_parents(MDSInternalContextBase *retryorfinish) {
- if (!_open_parents(retryorfinish))
- return false;
- delete retryorfinish;
- return true;
-}
-
-bool SnapRealm::have_past_parents_open(snapid_t first, snapid_t last)
-{
- dout(10) << "have_past_parents_open [" << first << "," << last << "]" << dendl;
- if (open)
- return true;
-
- for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
- p != srnode.past_parents.end();
- ++p) {
- if (p->second.first > last)
- break;
- dout(10) << " past parent [" << p->second.first << "," << p->first << "] was "
- << p->second.ino << dendl;
- if (open_past_parents.count(p->second.ino) == 0) {
- dout(10) << " past parent " << p->second.ino << " is not open" << dendl;
- return false;
- }
- SnapRealm *parent_realm = open_past_parents[p->second.ino].first;
- if (!parent_realm->have_past_parents_open(MAX(first, p->second.first),
- MIN(last, p->first)))
- return false;
- }
-
- open = true;
- return true;
-}
-
-void SnapRealm::close_parents()
-{
- for (auto p = open_past_parents.begin(); p != open_past_parents.end(); ++p) {
- num_open_past_parents -= p->second.second.size();
- p->second.first->inode->put(CInode::PIN_PASTSNAPPARENT);
- p->second.first->open_past_children.erase(this);
- }
- open_past_parents.clear();
-}
-
-
-/*
- * get list of snaps for this realm. we must include parents' snaps
- * for the intervals during which they were our parent.
- */
-void SnapRealm::build_snap_set(set<snapid_t> &s,
- snapid_t& max_seq, snapid_t& max_last_created, snapid_t& max_last_destroyed,
- snapid_t first, snapid_t last) const
-{
- dout(10) << "build_snap_set [" << first << "," << last << "] on " << *this << dendl;
-
- if (srnode.seq > max_seq)
- max_seq = srnode.seq;
- if (srnode.last_created > max_last_created)
- max_last_created = srnode.last_created;
- if (srnode.last_destroyed > max_last_destroyed)
- max_last_destroyed = srnode.last_destroyed;
-
- // include my snaps within interval [first,last]
- for (map<snapid_t, SnapInfo>::const_iterator p = srnode.snaps.lower_bound(first); // first element >= first
- p != srnode.snaps.end() && p->first <= last;
- ++p)
- s.insert(p->first);
-
- // include snaps for parents during intervals that intersect [first,last]
- for (map<snapid_t, snaplink_t>::const_iterator p = srnode.past_parents.lower_bound(first);
- p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
- ++p) {
- const CInode *oldparent = mdcache->get_inode(p->second.ino);
- assert(oldparent); // call open_parents first!
- assert(oldparent->snaprealm);
- oldparent->snaprealm->build_snap_set(s, max_seq, max_last_created, max_last_destroyed,
- MAX(first, p->second.first),
- MIN(last, p->first));
- }
- if (srnode.current_parent_since <= last && parent)
- parent->build_snap_set(s, max_seq, max_last_created, max_last_destroyed,
- MAX(first, srnode.current_parent_since), last);
-}
-
-
-void SnapRealm::check_cache() const
-{
- assert(open);
- if (cached_seq >= srnode.seq)
- return;
-
- cached_snaps.clear();
- cached_snap_context.clear();
-
- cached_last_created = srnode.last_created;
- cached_last_destroyed = srnode.last_destroyed;
- cached_seq = srnode.seq;
- build_snap_set(cached_snaps, cached_seq, cached_last_created, cached_last_destroyed,
- 0, CEPH_NOSNAP);
-
- cached_snap_trace.clear();
- build_snap_trace(cached_snap_trace);
-
- dout(10) << "check_cache rebuilt " << cached_snaps
- << " seq " << srnode.seq
- << " cached_seq " << cached_seq
- << " cached_last_created " << cached_last_created
- << " cached_last_destroyed " << cached_last_destroyed
- << ")" << dendl;
-}
-
-const set<snapid_t>& SnapRealm::get_snaps() const
-{
- check_cache();
- dout(10) << "get_snaps " << cached_snaps
- << " (seq " << srnode.seq << " cached_seq " << cached_seq << ")"
- << dendl;
- return cached_snaps;
-}
-
-/*
- * build vector in reverse sorted order
- */
-const SnapContext& SnapRealm::get_snap_context() const
-{
- check_cache();
-
- if (!cached_snap_context.seq) {
- cached_snap_context.seq = cached_seq;
- cached_snap_context.snaps.resize(cached_snaps.size());
- unsigned i = 0;
- for (set<snapid_t>::reverse_iterator p = cached_snaps.rbegin();
- p != cached_snaps.rend();
- ++p)
- cached_snap_context.snaps[i++] = *p;
- }
-
- return cached_snap_context;
-}
-
-void SnapRealm::get_snap_info(map<snapid_t,SnapInfo*>& infomap, snapid_t first, snapid_t last)
-{
- const set<snapid_t>& snaps = get_snaps();
- dout(10) << "get_snap_info snaps " << snaps << dendl;
-
- // include my snaps within interval [first,last]
- for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first
- p != srnode.snaps.end() && p->first <= last;
- ++p)
- infomap[p->first] = &p->second;
-
- // include snaps for parents during intervals that intersect [first,last]
- for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
- p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
- ++p) {
- CInode *oldparent = mdcache->get_inode(p->second.ino);
- assert(oldparent); // call open_parents first!
- assert(oldparent->snaprealm);
- oldparent->snaprealm->get_snap_info(infomap,
- MAX(first, p->second.first),
- MIN(last, p->first));
- }
- if (srnode.current_parent_since <= last && parent)
- parent->get_snap_info(infomap, MAX(first, srnode.current_parent_since), last);
-}
-
-const string& SnapRealm::get_snapname(snapid_t snapid, inodeno_t atino)
-{
- auto srnode_snaps_entry = srnode.snaps.find(snapid);
- if (srnode_snaps_entry != srnode.snaps.end()) {
- if (atino == inode->ino())
- return srnode_snaps_entry->second.name;
- else
- return srnode_snaps_entry->second.get_long_name();
- }
-
- map<snapid_t,snaplink_t>::iterator p = srnode.past_parents.lower_bound(snapid);
- if (p != srnode.past_parents.end() && p->second.first <= snapid) {
- CInode *oldparent = mdcache->get_inode(p->second.ino);
- assert(oldparent); // call open_parents first!
- assert(oldparent->snaprealm);
- return oldparent->snaprealm->get_snapname(snapid, atino);
- }
-
- assert(srnode.current_parent_since <= snapid);
- assert(parent);
- return parent->get_snapname(snapid, atino);
-}
-
-snapid_t SnapRealm::resolve_snapname(const string& n, inodeno_t atino, snapid_t first, snapid_t last)
-{
- // first try me
- dout(10) << "resolve_snapname '" << n << "' in [" << first << "," << last << "]" << dendl;
-
- //snapid_t num;
- //if (n[0] == '~') num = atoll(n.c_str()+1);
-
- bool actual = (atino == inode->ino());
- string pname;
- inodeno_t pino;
- if (!actual) {
- if (!n.length() ||
- n[0] != '_') return 0;
- int next_ = n.find('_', 1);
- if (next_ < 0) return 0;
- pname = n.substr(1, next_ - 1);
- pino = atoll(n.c_str() + next_ + 1);
- dout(10) << " " << n << " parses to name '" << pname << "' dirino " << pino << dendl;
- }
-
- for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first
- p != srnode.snaps.end() && p->first <= last;
- ++p) {
- dout(15) << " ? " << p->second << dendl;
- //if (num && p->second.snapid == num)
- //return p->first;
- if (actual && p->second.name == n)
- return p->first;
- if (!actual && p->second.name == pname && p->second.ino == pino)
- return p->first;
- }
-
- // include snaps for parents during intervals that intersect [first,last]
- for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
- p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
- ++p) {
- CInode *oldparent = mdcache->get_inode(p->second.ino);
- assert(oldparent); // call open_parents first!
- assert(oldparent->snaprealm);
- snapid_t r = oldparent->snaprealm->resolve_snapname(n, atino,
- MAX(first, p->second.first),
- MIN(last, p->first));
- if (r)
- return r;
- }
- if (parent && srnode.current_parent_since <= last)
- return parent->resolve_snapname(n, atino, MAX(first, srnode.current_parent_since), last);
- return 0;
-}
-
-
-void SnapRealm::adjust_parent()
-{
- SnapRealm *newparent = inode->get_parent_dn()->get_dir()->get_inode()->find_snaprealm();
- if (newparent != parent) {
- dout(10) << "adjust_parent " << parent << " -> " << newparent << dendl;
- if (parent)
- parent->open_children.erase(this);
- parent = newparent;
- if (parent)
- parent->open_children.insert(this);
-
- invalidate_cached_snaps();
- }
-}
-
-void SnapRealm::split_at(SnapRealm *child)
-{
- dout(10) << "split_at " << *child
- << " on " << *child->inode << dendl;
-
- if (inode->is_mdsdir() || !child->inode->is_dir()) {
- // it's not a dir.
- if (child->inode->containing_realm) {
- // - no open children.
- // - only need to move this child's inode's caps.
- child->inode->move_to_realm(child);
- } else {
- // no caps, nothing to move/split.
- dout(20) << " split no-op, no caps to move on file " << *child->inode << dendl;
- assert(!child->inode->is_any_caps());
- }
- return;
- }
-
- // it's a dir.
-
- // split open_children
- dout(10) << " open_children are " << open_children << dendl;
- for (set<SnapRealm*>::iterator p = open_children.begin();
- p != open_children.end(); ) {
- SnapRealm *realm = *p;
- if (realm != child &&
- child->inode->is_projected_ancestor_of(realm->inode)) {
- dout(20) << " child gets child realm " << *realm << " on " << *realm->inode << dendl;
- realm->parent = child;
- child->open_children.insert(realm);
- open_children.erase(p++);
- } else {
- dout(20) << " keeping child realm " << *realm << " on " << *realm->inode << dendl;
- ++p;
- }
- }
-
- // split inodes_with_caps
- elist<CInode*>::iterator p = inodes_with_caps.begin(member_offset(CInode, item_caps));
- while (!p.end()) {
- CInode *in = *p;
- ++p;
-
- // does inode fall within the child realm?
- bool under_child = false;
-
- if (in == child->inode) {
- under_child = true;
- } else {
- CInode *t = in;
- while (t->get_parent_dn()) {
- t = t->get_parent_dn()->get_dir()->get_inode();
- if (t == child->inode) {
- under_child = true;
- break;
- }
- if (t == in)
- break;
- }
- }
- if (under_child) {
- dout(20) << " child gets " << *in << dendl;
- in->move_to_realm(child);
- } else {
- dout(20) << " keeping " << *in << dendl;
- }
- }
-
-}
-
-const bufferlist& SnapRealm::get_snap_trace()
-{
- check_cache();
- return cached_snap_trace;
-}
-
-void SnapRealm::build_snap_trace(bufferlist& snapbl) const
-{
- SnapRealmInfo info(inode->ino(), srnode.created, srnode.seq, srnode.current_parent_since);
-
- if (parent) {
- info.h.parent = parent->inode->ino();
- if (!srnode.past_parents.empty()) {
- snapid_t last = srnode.past_parents.rbegin()->first;
- set<snapid_t> past;
- snapid_t max_seq, max_last_created, max_last_destroyed;
- build_snap_set(past, max_seq, max_last_created, max_last_destroyed, 0, last);
- info.prior_parent_snaps.reserve(past.size());
- for (set<snapid_t>::reverse_iterator p = past.rbegin(); p != past.rend(); ++p)
- info.prior_parent_snaps.push_back(*p);
- dout(10) << "build_snap_trace prior_parent_snaps from [1," << last << "] "
- << info.prior_parent_snaps << dendl;
- }
- } else
- info.h.parent = 0;
-
- info.my_snaps.reserve(srnode.snaps.size());
- for (map<snapid_t,SnapInfo>::const_reverse_iterator p = srnode.snaps.rbegin();
- p != srnode.snaps.rend();
- ++p)
- info.my_snaps.push_back(p->first);
- dout(10) << "build_snap_trace my_snaps " << info.my_snaps << dendl;
-
- ::encode(info, snapbl);
-
- if (parent)
- parent->build_snap_trace(snapbl);
-}
-
-
-
-void SnapRealm::prune_past_parents()
-{
- dout(10) << "prune_past_parents" << dendl;
- check_cache();
- assert(open);
-
- map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.begin();
- while (p != srnode.past_parents.end()) {
- set<snapid_t>::iterator q = cached_snaps.lower_bound(p->second.first);
- if (q == cached_snaps.end() ||
- *q > p->first) {
- dout(10) << "prune_past_parents pruning [" << p->second.first << "," << p->first
- << "] " << p->second.ino << dendl;
- remove_open_past_parent(p->second.ino, p->first);
- srnode.past_parents.erase(p++);
- } else {
- dout(10) << "prune_past_parents keeping [" << p->second.first << "," << p->first
- << "] " << p->second.ino << dendl;
- ++p;
- }
- }
-}
-