X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Fclient%2FClient.cc;fp=src%2Fceph%2Fsrc%2Fclient%2FClient.cc;h=0000000000000000000000000000000000000000;hb=7da45d65be36d36b880cc55c5036e96c24b53f00;hp=1d9277a61b6ecfb1d5c01f83d2aedc4418600e3f;hpb=691462d09d0987b47e112d6ee8740375df3c51b2;p=stor4nfv.git diff --git a/src/ceph/src/client/Client.cc b/src/ceph/src/client/Client.cc deleted file mode 100644 index 1d9277a..0000000 --- a/src/ceph/src/client/Client.cc +++ /dev/null @@ -1,13842 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2004-2006 Sage Weil - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - - -// unix-ey fs stuff -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#if defined(__FreeBSD__) -#define XATTR_CREATE 0x1 -#define XATTR_REPLACE 0x2 -#else -#include -#endif - -#if defined(__linux__) -#include -#endif - -#include - -#include "common/config.h" -#include "common/version.h" - -// ceph stuff -#include "messages/MClientSession.h" -#include "messages/MClientReconnect.h" -#include "messages/MClientRequest.h" -#include "messages/MClientRequestForward.h" -#include "messages/MClientReply.h" -#include "messages/MClientCaps.h" -#include "messages/MClientLease.h" -#include "messages/MClientSnap.h" -#include "messages/MCommandReply.h" -#include "messages/MOSDMap.h" -#include "messages/MClientQuota.h" -#include "messages/MClientCapRelease.h" -#include "messages/MMDSMap.h" -#include "messages/MFSMap.h" -#include "messages/MFSMapUser.h" - -#include "mon/MonClient.h" - -#include "mds/flock.h" -#include "osd/OSDMap.h" -#include "osdc/Filer.h" - -#include "common/Cond.h" -#include "common/Mutex.h" -#include "common/perf_counters.h" -#include "common/admin_socket.h" -#include "common/errno.h" -#include "include/str_list.h" - -#define dout_subsys ceph_subsys_client - -#include "include/lru.h" -#include "include/compat.h" -#include "include/stringify.h" - -#include "Client.h" -#include "Inode.h" -#include "Dentry.h" -#include "Dir.h" -#include "ClientSnapRealm.h" -#include "Fh.h" -#include "MetaSession.h" -#include "MetaRequest.h" -#include "ObjecterWriteback.h" -#include "posix_acl.h" - -#include "include/assert.h" -#include "include/stat.h" - -#include "include/cephfs/ceph_statx.h" - -#if HAVE_GETGROUPLIST -#include -#include -#include -#endif - -#undef dout_prefix -#define dout_prefix *_dout << "client." << whoami << " " - -#define tout(cct) if (!cct->_conf->client_trace.empty()) traceout - -// FreeBSD fails to define this -#ifndef O_DSYNC -#define O_DSYNC 0x0 -#endif -// Darwin fails to define this -#ifndef O_RSYNC -#define O_RSYNC 0x0 -#endif - -#ifndef O_DIRECT -#define O_DIRECT 0x0 -#endif - -#define DEBUG_GETATTR_CAPS (CEPH_CAP_XATTR_SHARED) - -void client_flush_set_callback(void *p, ObjectCacher::ObjectSet *oset) -{ - Client *client = static_cast(p); - client->flush_set_callback(oset); -} - - -// ------------- - -Client::CommandHook::CommandHook(Client *client) : - m_client(client) -{ -} - -bool Client::CommandHook::call(std::string command, cmdmap_t& cmdmap, - std::string format, bufferlist& out) -{ - Formatter *f = Formatter::create(format); - f->open_object_section("result"); - m_client->client_lock.Lock(); - if (command == "mds_requests") - m_client->dump_mds_requests(f); - else if (command == "mds_sessions") - m_client->dump_mds_sessions(f); - else if (command == "dump_cache") - m_client->dump_cache(f); - else if (command == "kick_stale_sessions") - m_client->_kick_stale_sessions(); - else if (command == "status") - m_client->dump_status(f); - else - assert(0 == "bad command registered"); - m_client->client_lock.Unlock(); - f->close_section(); - f->flush(out); - delete f; - return true; -} - - -// ------------- - -dir_result_t::dir_result_t(Inode *in, const UserPerm& perms) - : inode(in), offset(0), next_offset(2), - release_count(0), ordered_count(0), cache_index(0), start_shared_gen(0), - perms(perms) - { } - -void Client::_reset_faked_inos() -{ - ino_t start = 1024; - free_faked_inos.clear(); - free_faked_inos.insert(start, (uint32_t)-1 - start + 1); - last_used_faked_ino = 0; - _use_faked_inos = sizeof(ino_t) < 8 || cct->_conf->client_use_faked_inos; -} - -void Client::_assign_faked_ino(Inode *in) -{ - interval_set::const_iterator it = free_faked_inos.lower_bound(last_used_faked_ino + 1); - if (it == free_faked_inos.end() && last_used_faked_ino > 0) { - last_used_faked_ino = 0; - it = free_faked_inos.lower_bound(last_used_faked_ino + 1); - } - assert(it != free_faked_inos.end()); - if (last_used_faked_ino < it.get_start()) { - assert(it.get_len() > 0); - last_used_faked_ino = it.get_start(); - } else { - ++last_used_faked_ino; - assert(it.get_start() + it.get_len() > last_used_faked_ino); - } - in->faked_ino = last_used_faked_ino; - free_faked_inos.erase(in->faked_ino); - faked_ino_map[in->faked_ino] = in->vino(); -} - -void Client::_release_faked_ino(Inode *in) -{ - free_faked_inos.insert(in->faked_ino); - faked_ino_map.erase(in->faked_ino); -} - -vinodeno_t Client::_map_faked_ino(ino_t ino) -{ - vinodeno_t vino; - if (ino == 1) - vino = root->vino(); - else if (faked_ino_map.count(ino)) - vino = faked_ino_map[ino]; - else - vino = vinodeno_t(0, CEPH_NOSNAP); - ldout(cct, 10) << "map_faked_ino " << ino << " -> " << vino << dendl; - return vino; -} - -vinodeno_t Client::map_faked_ino(ino_t ino) -{ - Mutex::Locker lock(client_lock); - return _map_faked_ino(ino); -} - -// cons/des - -Client::Client(Messenger *m, MonClient *mc, Objecter *objecter_) - : Dispatcher(m->cct), - m_command_hook(this), - timer(m->cct, client_lock), - callback_handle(NULL), - switch_interrupt_cb(NULL), - remount_cb(NULL), - ino_invalidate_cb(NULL), - dentry_invalidate_cb(NULL), - getgroups_cb(NULL), - umask_cb(NULL), - can_invalidate_dentries(false), - require_remount(false), - async_ino_invalidator(m->cct), - async_dentry_invalidator(m->cct), - interrupt_finisher(m->cct), - remount_finisher(m->cct), - objecter_finisher(m->cct), - tick_event(NULL), - messenger(m), monclient(mc), - objecter(objecter_), - whoami(mc->get_global_id()), cap_epoch_barrier(0), - last_tid(0), oldest_tid(0), last_flush_tid(1), - initialized(false), - mounted(false), unmounting(false), blacklisted(false), - local_osd(-1), local_osd_epoch(0), - unsafe_sync_write(0), - client_lock("Client::client_lock") -{ - _reset_faked_inos(); - // - root = 0; - - num_flushing_caps = 0; - - _dir_vxattrs_name_size = _vxattrs_calcu_name_size(_dir_vxattrs); - _file_vxattrs_name_size = _vxattrs_calcu_name_size(_file_vxattrs); - - user_id = cct->_conf->client_mount_uid; - group_id = cct->_conf->client_mount_gid; - - acl_type = NO_ACL; - if (cct->_conf->client_acl_type == "posix_acl") - acl_type = POSIX_ACL; - - lru.lru_set_midpoint(cct->_conf->client_cache_mid); - - // file handles - free_fd_set.insert(10, 1<<30); - - mdsmap.reset(new MDSMap); - - // osd interfaces - writeback_handler.reset(new ObjecterWriteback(objecter, &objecter_finisher, - &client_lock)); - objectcacher.reset(new ObjectCacher(cct, "libcephfs", *writeback_handler, client_lock, - client_flush_set_callback, // all commit callback - (void*)this, - cct->_conf->client_oc_size, - cct->_conf->client_oc_max_objects, - cct->_conf->client_oc_max_dirty, - cct->_conf->client_oc_target_dirty, - cct->_conf->client_oc_max_dirty_age, - true)); - objecter_finisher.start(); - filer.reset(new Filer(objecter, &objecter_finisher)); - objecter->enable_blacklist_events(); -} - - -Client::~Client() -{ - assert(!client_lock.is_locked()); - - // It is necessary to hold client_lock, because any inode destruction - // may call into ObjectCacher, which asserts that it's lock (which is - // client_lock) is held. - client_lock.Lock(); - tear_down_cache(); - client_lock.Unlock(); -} - -void Client::tear_down_cache() -{ - // fd's - for (ceph::unordered_map::iterator it = fd_map.begin(); - it != fd_map.end(); - ++it) { - Fh *fh = it->second; - ldout(cct, 1) << "tear_down_cache forcing close of fh " << it->first << " ino " << fh->inode->ino << dendl; - _release_fh(fh); - } - fd_map.clear(); - - while (!opened_dirs.empty()) { - dir_result_t *dirp = *opened_dirs.begin(); - ldout(cct, 1) << "tear_down_cache forcing close of dir " << dirp << " ino " << dirp->inode->ino << dendl; - _closedir(dirp); - } - - // caps! - // *** FIXME *** - - // empty lru - trim_cache(); - assert(lru.lru_get_size() == 0); - - // close root ino - assert(inode_map.size() <= 1 + root_parents.size()); - if (root && inode_map.size() == 1 + root_parents.size()) { - delete root; - root = 0; - root_ancestor = 0; - while (!root_parents.empty()) - root_parents.erase(root_parents.begin()); - inode_map.clear(); - _reset_faked_inos(); - } - - assert(inode_map.empty()); -} - -inodeno_t Client::get_root_ino() -{ - Mutex::Locker l(client_lock); - if (use_faked_inos()) - return root->faked_ino; - else - return root->ino; -} - -Inode *Client::get_root() -{ - Mutex::Locker l(client_lock); - root->ll_get(); - return root; -} - - -// debug crapola - -void Client::dump_inode(Formatter *f, Inode *in, set& did, bool disconnected) -{ - filepath path; - in->make_long_path(path); - ldout(cct, 1) << "dump_inode: " - << (disconnected ? "DISCONNECTED ":"") - << "inode " << in->ino - << " " << path - << " ref " << in->get_num_ref() - << *in << dendl; - - if (f) { - f->open_object_section("inode"); - f->dump_stream("path") << path; - if (disconnected) - f->dump_int("disconnected", 1); - in->dump(f); - f->close_section(); - } - - did.insert(in); - if (in->dir) { - ldout(cct, 1) << " dir " << in->dir << " size " << in->dir->dentries.size() << dendl; - for (ceph::unordered_map::iterator it = in->dir->dentries.begin(); - it != in->dir->dentries.end(); - ++it) { - ldout(cct, 1) << " " << in->ino << " dn " << it->first << " " << it->second << " ref " << it->second->ref << dendl; - if (f) { - f->open_object_section("dentry"); - it->second->dump(f); - f->close_section(); - } - if (it->second->inode) - dump_inode(f, it->second->inode.get(), did, false); - } - } -} - -void Client::dump_cache(Formatter *f) -{ - set did; - - ldout(cct, 1) << "dump_cache" << dendl; - - if (f) - f->open_array_section("cache"); - - if (root) - dump_inode(f, root, did, true); - - // make a second pass to catch anything disconnected - for (ceph::unordered_map::iterator it = inode_map.begin(); - it != inode_map.end(); - ++it) { - if (did.count(it->second)) - continue; - dump_inode(f, it->second, did, true); - } - - if (f) - f->close_section(); -} - -void Client::dump_status(Formatter *f) -{ - assert(client_lock.is_locked_by_me()); - - ldout(cct, 1) << __func__ << dendl; - - const epoch_t osd_epoch - = objecter->with_osdmap(std::mem_fn(&OSDMap::get_epoch)); - - if (f) { - f->open_object_section("metadata"); - for (const auto& kv : metadata) - f->dump_string(kv.first.c_str(), kv.second); - f->close_section(); - - f->dump_int("dentry_count", lru.lru_get_size()); - f->dump_int("dentry_pinned_count", lru.lru_get_num_pinned()); - f->dump_int("id", get_nodeid().v); - f->dump_int("inode_count", inode_map.size()); - f->dump_int("mds_epoch", mdsmap->get_epoch()); - f->dump_int("osd_epoch", osd_epoch); - f->dump_int("osd_epoch_barrier", cap_epoch_barrier); - } -} - -int Client::init() -{ - timer.init(); - objectcacher->start(); - - client_lock.Lock(); - assert(!initialized); - - messenger->add_dispatcher_tail(this); - client_lock.Unlock(); - - _finish_init(); - return 0; -} - -void Client::_finish_init() -{ - client_lock.Lock(); - // logger - PerfCountersBuilder plb(cct, "client", l_c_first, l_c_last); - plb.add_time_avg(l_c_reply, "reply", "Latency of receiving a reply on metadata request"); - plb.add_time_avg(l_c_lat, "lat", "Latency of processing a metadata request"); - plb.add_time_avg(l_c_wrlat, "wrlat", "Latency of a file data write operation"); - logger.reset(plb.create_perf_counters()); - cct->get_perfcounters_collection()->add(logger.get()); - - client_lock.Unlock(); - - cct->_conf->add_observer(this); - - AdminSocket* admin_socket = cct->get_admin_socket(); - int ret = admin_socket->register_command("mds_requests", - "mds_requests", - &m_command_hook, - "show in-progress mds requests"); - if (ret < 0) { - lderr(cct) << "error registering admin socket command: " - << cpp_strerror(-ret) << dendl; - } - ret = admin_socket->register_command("mds_sessions", - "mds_sessions", - &m_command_hook, - "show mds session state"); - if (ret < 0) { - lderr(cct) << "error registering admin socket command: " - << cpp_strerror(-ret) << dendl; - } - ret = admin_socket->register_command("dump_cache", - "dump_cache", - &m_command_hook, - "show in-memory metadata cache contents"); - if (ret < 0) { - lderr(cct) << "error registering admin socket command: " - << cpp_strerror(-ret) << dendl; - } - ret = admin_socket->register_command("kick_stale_sessions", - "kick_stale_sessions", - &m_command_hook, - "kick sessions that were remote reset"); - if (ret < 0) { - lderr(cct) << "error registering admin socket command: " - << cpp_strerror(-ret) << dendl; - } - ret = admin_socket->register_command("status", - "status", - &m_command_hook, - "show overall client status"); - if (ret < 0) { - lderr(cct) << "error registering admin socket command: " - << cpp_strerror(-ret) << dendl; - } - - client_lock.Lock(); - initialized = true; - client_lock.Unlock(); -} - -void Client::shutdown() -{ - ldout(cct, 1) << "shutdown" << dendl; - - // If we were not mounted, but were being used for sending - // MDS commands, we may have sessions that need closing. - client_lock.Lock(); - _close_sessions(); - client_lock.Unlock(); - - cct->_conf->remove_observer(this); - - AdminSocket* admin_socket = cct->get_admin_socket(); - admin_socket->unregister_command("mds_requests"); - admin_socket->unregister_command("mds_sessions"); - admin_socket->unregister_command("dump_cache"); - admin_socket->unregister_command("kick_stale_sessions"); - admin_socket->unregister_command("status"); - - if (ino_invalidate_cb) { - ldout(cct, 10) << "shutdown stopping cache invalidator finisher" << dendl; - async_ino_invalidator.wait_for_empty(); - async_ino_invalidator.stop(); - } - - if (dentry_invalidate_cb) { - ldout(cct, 10) << "shutdown stopping dentry invalidator finisher" << dendl; - async_dentry_invalidator.wait_for_empty(); - async_dentry_invalidator.stop(); - } - - if (switch_interrupt_cb) { - ldout(cct, 10) << "shutdown stopping interrupt finisher" << dendl; - interrupt_finisher.wait_for_empty(); - interrupt_finisher.stop(); - } - - if (remount_cb) { - ldout(cct, 10) << "shutdown stopping remount finisher" << dendl; - remount_finisher.wait_for_empty(); - remount_finisher.stop(); - } - - objectcacher->stop(); // outside of client_lock! this does a join. - - client_lock.Lock(); - assert(initialized); - initialized = false; - timer.shutdown(); - client_lock.Unlock(); - - objecter_finisher.wait_for_empty(); - objecter_finisher.stop(); - - if (logger) { - cct->get_perfcounters_collection()->remove(logger.get()); - logger.reset(); - } -} - - -// =================== -// metadata cache stuff - -void Client::trim_cache(bool trim_kernel_dcache) -{ - uint64_t max = cct->_conf->client_cache_size; - ldout(cct, 20) << "trim_cache size " << lru.lru_get_size() << " max " << max << dendl; - unsigned last = 0; - while (lru.lru_get_size() != last) { - last = lru.lru_get_size(); - - if (!unmounting && lru.lru_get_size() <= max) break; - - // trim! - Dentry *dn = static_cast(lru.lru_get_next_expire()); - if (!dn) - break; // done - - trim_dentry(dn); - } - - if (trim_kernel_dcache && lru.lru_get_size() > max) - _invalidate_kernel_dcache(); - - // hose root? - if (lru.lru_get_size() == 0 && root && root->get_num_ref() == 0 && inode_map.size() == 1 + root_parents.size()) { - ldout(cct, 15) << "trim_cache trimmed root " << root << dendl; - delete root; - root = 0; - root_ancestor = 0; - while (!root_parents.empty()) - root_parents.erase(root_parents.begin()); - inode_map.clear(); - _reset_faked_inos(); - } -} - -void Client::trim_cache_for_reconnect(MetaSession *s) -{ - mds_rank_t mds = s->mds_num; - ldout(cct, 20) << "trim_cache_for_reconnect mds." << mds << dendl; - - int trimmed = 0; - list skipped; - while (lru.lru_get_size() > 0) { - Dentry *dn = static_cast(lru.lru_expire()); - if (!dn) - break; - - if ((dn->inode && dn->inode->caps.count(mds)) || - dn->dir->parent_inode->caps.count(mds)) { - trim_dentry(dn); - trimmed++; - } else - skipped.push_back(dn); - } - - for(list::iterator p = skipped.begin(); p != skipped.end(); ++p) - lru.lru_insert_mid(*p); - - ldout(cct, 20) << "trim_cache_for_reconnect mds." << mds - << " trimmed " << trimmed << " dentries" << dendl; - - if (s->caps.size() > 0) - _invalidate_kernel_dcache(); -} - -void Client::trim_dentry(Dentry *dn) -{ - ldout(cct, 15) << "trim_dentry unlinking dn " << dn->name - << " in dir " << hex << dn->dir->parent_inode->ino - << dendl; - if (dn->inode) { - Inode *diri = dn->dir->parent_inode; - diri->dir_release_count++; - clear_dir_complete_and_ordered(diri, true); - } - unlink(dn, false, false); // drop dir, drop dentry -} - - -void Client::update_inode_file_bits(Inode *in, - uint64_t truncate_seq, uint64_t truncate_size, - uint64_t size, uint64_t change_attr, - uint64_t time_warp_seq, utime_t ctime, - utime_t mtime, - utime_t atime, - version_t inline_version, - bufferlist& inline_data, - int issued) -{ - bool warn = false; - ldout(cct, 10) << "update_inode_file_bits " << *in << " " << ccap_string(issued) - << " mtime " << mtime << dendl; - ldout(cct, 25) << "truncate_seq: mds " << truncate_seq << " local " - << in->truncate_seq << " time_warp_seq: mds " << time_warp_seq - << " local " << in->time_warp_seq << dendl; - uint64_t prior_size = in->size; - - if (inline_version > in->inline_version) { - in->inline_data = inline_data; - in->inline_version = inline_version; - } - - /* always take a newer change attr */ - if (change_attr > in->change_attr) - in->change_attr = change_attr; - - if (truncate_seq > in->truncate_seq || - (truncate_seq == in->truncate_seq && size > in->size)) { - ldout(cct, 10) << "size " << in->size << " -> " << size << dendl; - in->size = size; - in->reported_size = size; - if (truncate_seq != in->truncate_seq) { - ldout(cct, 10) << "truncate_seq " << in->truncate_seq << " -> " - << truncate_seq << dendl; - in->truncate_seq = truncate_seq; - in->oset.truncate_seq = truncate_seq; - - // truncate cached file data - if (prior_size > size) { - _invalidate_inode_cache(in, truncate_size, prior_size - truncate_size); - } - } - - // truncate inline data - if (in->inline_version < CEPH_INLINE_NONE) { - uint32_t len = in->inline_data.length(); - if (size < len) - in->inline_data.splice(size, len - size); - } - } - if (truncate_seq >= in->truncate_seq && - in->truncate_size != truncate_size) { - if (in->is_file()) { - ldout(cct, 10) << "truncate_size " << in->truncate_size << " -> " - << truncate_size << dendl; - in->truncate_size = truncate_size; - in->oset.truncate_size = truncate_size; - } else { - ldout(cct, 0) << "Hmmm, truncate_seq && truncate_size changed on non-file inode!" << dendl; - } - } - - // be careful with size, mtime, atime - if (issued & (CEPH_CAP_FILE_EXCL| - CEPH_CAP_FILE_WR| - CEPH_CAP_FILE_BUFFER| - CEPH_CAP_AUTH_EXCL| - CEPH_CAP_XATTR_EXCL)) { - ldout(cct, 30) << "Yay have enough caps to look at our times" << dendl; - if (ctime > in->ctime) - in->ctime = ctime; - if (time_warp_seq > in->time_warp_seq) { - ldout(cct, 10) << "mds time_warp_seq " << time_warp_seq << " on inode " << *in - << " is higher than local time_warp_seq " - << in->time_warp_seq << dendl; - //the mds updated times, so take those! - in->mtime = mtime; - in->atime = atime; - in->time_warp_seq = time_warp_seq; - } else if (time_warp_seq == in->time_warp_seq) { - //take max times - if (mtime > in->mtime) - in->mtime = mtime; - if (atime > in->atime) - in->atime = atime; - } else if (issued & CEPH_CAP_FILE_EXCL) { - //ignore mds values as we have a higher seq - } else warn = true; - } else { - ldout(cct, 30) << "Don't have enough caps, just taking mds' time values" << dendl; - if (time_warp_seq >= in->time_warp_seq) { - in->ctime = ctime; - in->mtime = mtime; - in->atime = atime; - in->time_warp_seq = time_warp_seq; - } else warn = true; - } - if (warn) { - ldout(cct, 0) << "WARNING: " << *in << " mds time_warp_seq " - << time_warp_seq << " is lower than local time_warp_seq " - << in->time_warp_seq - << dendl; - } -} - -void Client::_fragmap_remove_non_leaves(Inode *in) -{ - for (map::iterator p = in->fragmap.begin(); p != in->fragmap.end(); ) - if (!in->dirfragtree.is_leaf(p->first)) - in->fragmap.erase(p++); - else - ++p; -} - -void Client::_fragmap_remove_stopped_mds(Inode *in, mds_rank_t mds) -{ - for (auto p = in->fragmap.begin(); p != in->fragmap.end(); ) - if (p->second == mds) - in->fragmap.erase(p++); - else - ++p; -} - -Inode * Client::add_update_inode(InodeStat *st, utime_t from, - MetaSession *session, - const UserPerm& request_perms) -{ - Inode *in; - bool was_new = false; - if (inode_map.count(st->vino)) { - in = inode_map[st->vino]; - ldout(cct, 12) << "add_update_inode had " << *in << " caps " << ccap_string(st->cap.caps) << dendl; - } else { - in = new Inode(this, st->vino, &st->layout); - inode_map[st->vino] = in; - - if (use_faked_inos()) - _assign_faked_ino(in); - - if (!root) { - root = in; - root_ancestor = in; - cwd = root; - } else if (!mounted) { - root_parents[root_ancestor] = in; - root_ancestor = in; - } - - // immutable bits - in->ino = st->vino.ino; - in->snapid = st->vino.snapid; - in->mode = st->mode & S_IFMT; - was_new = true; - } - - in->rdev = st->rdev; - if (in->is_symlink()) - in->symlink = st->symlink; - - if (was_new) - ldout(cct, 12) << "add_update_inode adding " << *in << " caps " << ccap_string(st->cap.caps) << dendl; - - if (!st->cap.caps) - return in; // as with readdir returning indoes in different snaprealms (no caps!) - - // only update inode if mds info is strictly newer, or it is the same and projected (odd). - bool updating_inode = false; - int issued = 0; - if (st->version == 0 || - (in->version & ~1) < st->version) { - updating_inode = true; - - int implemented = 0; - issued = in->caps_issued(&implemented) | in->caps_dirty(); - issued |= implemented; - - in->version = st->version; - - if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { - in->mode = st->mode; - in->uid = st->uid; - in->gid = st->gid; - in->btime = st->btime; - } - - if ((issued & CEPH_CAP_LINK_EXCL) == 0) { - in->nlink = st->nlink; - } - - in->dirstat = st->dirstat; - in->rstat = st->rstat; - in->quota = st->quota; - in->layout = st->layout; - - if (in->is_dir()) { - in->dir_layout = st->dir_layout; - ldout(cct, 20) << " dir hash is " << (int)in->dir_layout.dl_dir_hash << dendl; - } - - update_inode_file_bits(in, st->truncate_seq, st->truncate_size, st->size, - st->change_attr, st->time_warp_seq, st->ctime, - st->mtime, st->atime, st->inline_version, - st->inline_data, issued); - } else if (st->inline_version > in->inline_version) { - in->inline_data = st->inline_data; - in->inline_version = st->inline_version; - } - - if ((in->xattr_version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) && - st->xattrbl.length() && - st->xattr_version > in->xattr_version) { - bufferlist::iterator p = st->xattrbl.begin(); - ::decode(in->xattrs, p); - in->xattr_version = st->xattr_version; - } - - // move me if/when version reflects fragtree changes. - if (in->dirfragtree != st->dirfragtree) { - in->dirfragtree = st->dirfragtree; - _fragmap_remove_non_leaves(in); - } - - if (in->snapid == CEPH_NOSNAP) { - add_update_cap(in, session, st->cap.cap_id, st->cap.caps, st->cap.seq, - st->cap.mseq, inodeno_t(st->cap.realm), st->cap.flags, - request_perms); - if (in->auth_cap && in->auth_cap->session == session) - in->max_size = st->max_size; - } else - in->snap_caps |= st->cap.caps; - - // setting I_COMPLETE needs to happen after adding the cap - if (updating_inode && - in->is_dir() && - (st->cap.caps & CEPH_CAP_FILE_SHARED) && - (issued & CEPH_CAP_FILE_EXCL) == 0 && - in->dirstat.nfiles == 0 && - in->dirstat.nsubdirs == 0) { - ldout(cct, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on empty dir " << *in << dendl; - in->flags |= I_COMPLETE | I_DIR_ORDERED; - if (in->dir) { - ldout(cct, 10) << " dir is open on empty dir " << in->ino << " with " - << in->dir->dentries.size() << " entries, marking all dentries null" << dendl; - in->dir->readdir_cache.clear(); - for (auto p = in->dir->dentries.begin(); - p != in->dir->dentries.end(); - ++p) { - unlink(p->second, true, true); // keep dir, keep dentry - } - if (in->dir->dentries.empty()) - close_dir(in->dir); - } - } - - return in; -} - - -/* - * insert_dentry_inode - insert + link a single dentry + inode into the metadata cache. - */ -Dentry *Client::insert_dentry_inode(Dir *dir, const string& dname, LeaseStat *dlease, - Inode *in, utime_t from, MetaSession *session, - Dentry *old_dentry) -{ - Dentry *dn = NULL; - if (dir->dentries.count(dname)) - dn = dir->dentries[dname]; - - ldout(cct, 12) << "insert_dentry_inode '" << dname << "' vino " << in->vino() - << " in dir " << dir->parent_inode->vino() << " dn " << dn - << dendl; - - if (dn && dn->inode) { - if (dn->inode->vino() == in->vino()) { - touch_dn(dn); - ldout(cct, 12) << " had dentry " << dname - << " with correct vino " << dn->inode->vino() - << dendl; - } else { - ldout(cct, 12) << " had dentry " << dname - << " with WRONG vino " << dn->inode->vino() - << dendl; - unlink(dn, true, true); // keep dir, keep dentry - } - } - - if (!dn || !dn->inode) { - InodeRef tmp_ref(in); - if (old_dentry) { - if (old_dentry->dir != dir) { - Inode *old_diri = old_dentry->dir->parent_inode; - old_diri->dir_ordered_count++; - clear_dir_complete_and_ordered(old_diri, false); - } - unlink(old_dentry, dir == old_dentry->dir, false); // drop dentry, keep dir open if its the same dir - } - Inode *diri = dir->parent_inode; - diri->dir_ordered_count++; - clear_dir_complete_and_ordered(diri, false); - dn = link(dir, dname, in, dn); - } - - update_dentry_lease(dn, dlease, from, session); - return dn; -} - -void Client::update_dentry_lease(Dentry *dn, LeaseStat *dlease, utime_t from, MetaSession *session) -{ - utime_t dttl = from; - dttl += (float)dlease->duration_ms / 1000.0; - - assert(dn); - - if (dlease->mask & CEPH_LOCK_DN) { - if (dttl > dn->lease_ttl) { - ldout(cct, 10) << "got dentry lease on " << dn->name - << " dur " << dlease->duration_ms << "ms ttl " << dttl << dendl; - dn->lease_ttl = dttl; - dn->lease_mds = session->mds_num; - dn->lease_seq = dlease->seq; - dn->lease_gen = session->cap_gen; - } - } - dn->cap_shared_gen = dn->dir->parent_inode->shared_gen; -} - - -/* - * update MDS location cache for a single inode - */ -void Client::update_dir_dist(Inode *in, DirStat *dst) -{ - // auth - ldout(cct, 20) << "got dirfrag map for " << in->ino << " frag " << dst->frag << " to mds " << dst->auth << dendl; - if (dst->auth >= 0) { - in->fragmap[dst->frag] = dst->auth; - } else { - in->fragmap.erase(dst->frag); - } - if (!in->dirfragtree.is_leaf(dst->frag)) { - in->dirfragtree.force_to_leaf(cct, dst->frag); - _fragmap_remove_non_leaves(in); - } - - // replicated - in->dir_replicated = !dst->dist.empty(); // FIXME that's just one frag! - - // dist - /* - if (!st->dirfrag_dist.empty()) { // FIXME - set dist = st->dirfrag_dist.begin()->second; - if (dist.empty() && !in->dir_contacts.empty()) - ldout(cct, 9) << "lost dist spec for " << in->ino - << " " << dist << dendl; - if (!dist.empty() && in->dir_contacts.empty()) - ldout(cct, 9) << "got dist spec for " << in->ino - << " " << dist << dendl; - in->dir_contacts = dist; - } - */ -} - -void Client::clear_dir_complete_and_ordered(Inode *diri, bool complete) -{ - if (diri->flags & I_COMPLETE) { - if (complete) { - ldout(cct, 10) << " clearing (I_COMPLETE|I_DIR_ORDERED) on " << *diri << dendl; - diri->flags &= ~(I_COMPLETE | I_DIR_ORDERED); - } else { - if (diri->flags & I_DIR_ORDERED) { - ldout(cct, 10) << " clearing I_DIR_ORDERED on " << *diri << dendl; - diri->flags &= ~I_DIR_ORDERED; - } - } - if (diri->dir) - diri->dir->readdir_cache.clear(); - } -} - -/* - * insert results from readdir or lssnap into the metadata cache. - */ -void Client::insert_readdir_results(MetaRequest *request, MetaSession *session, Inode *diri) { - - MClientReply *reply = request->reply; - ConnectionRef con = request->reply->get_connection(); - uint64_t features = con->get_features(); - - dir_result_t *dirp = request->dirp; - assert(dirp); - - // the extra buffer list is only set for readdir and lssnap replies - bufferlist::iterator p = reply->get_extra_bl().begin(); - if (!p.end()) { - // snapdir? - if (request->head.op == CEPH_MDS_OP_LSSNAP) { - assert(diri); - diri = open_snapdir(diri); - } - - // only open dir if we're actually adding stuff to it! - Dir *dir = diri->open_dir(); - assert(dir); - - // dirstat - DirStat dst(p); - __u32 numdn; - __u16 flags; - ::decode(numdn, p); - ::decode(flags, p); - - bool end = ((unsigned)flags & CEPH_READDIR_FRAG_END); - bool hash_order = ((unsigned)flags & CEPH_READDIR_HASH_ORDER); - - frag_t fg = (unsigned)request->head.args.readdir.frag; - unsigned readdir_offset = dirp->next_offset; - string readdir_start = dirp->last_name; - assert(!readdir_start.empty() || readdir_offset == 2); - - unsigned last_hash = 0; - if (hash_order) { - if (!readdir_start.empty()) { - last_hash = ceph_frag_value(diri->hash_dentry_name(readdir_start)); - } else if (flags & CEPH_READDIR_OFFSET_HASH) { - /* mds understands offset_hash */ - last_hash = (unsigned)request->head.args.readdir.offset_hash; - } - } - - if (fg != dst.frag) { - ldout(cct, 10) << "insert_trace got new frag " << fg << " -> " << dst.frag << dendl; - fg = dst.frag; - if (!hash_order) { - readdir_offset = 2; - readdir_start.clear(); - dirp->offset = dir_result_t::make_fpos(fg, readdir_offset, false); - } - } - - ldout(cct, 10) << __func__ << " " << numdn << " readdir items, end=" << end - << ", hash_order=" << hash_order - << ", readdir_start " << readdir_start - << ", last_hash " << last_hash - << ", next_offset " << readdir_offset << dendl; - - if (diri->snapid != CEPH_SNAPDIR && - fg.is_leftmost() && readdir_offset == 2 && - !(hash_order && last_hash)) { - dirp->release_count = diri->dir_release_count; - dirp->ordered_count = diri->dir_ordered_count; - dirp->start_shared_gen = diri->shared_gen; - dirp->cache_index = 0; - } - - dirp->buffer_frag = fg; - - _readdir_drop_dirp_buffer(dirp); - dirp->buffer.reserve(numdn); - - string dname; - LeaseStat dlease; - for (unsigned i=0; isent_stamp, session, - request->perms); - Dentry *dn; - if (diri->dir->dentries.count(dname)) { - Dentry *olddn = diri->dir->dentries[dname]; - if (olddn->inode != in) { - // replace incorrect dentry - unlink(olddn, true, true); // keep dir, dentry - dn = link(dir, dname, in, olddn); - assert(dn == olddn); - } else { - // keep existing dn - dn = olddn; - touch_dn(dn); - } - } else { - // new dn - dn = link(dir, dname, in, NULL); - } - - update_dentry_lease(dn, &dlease, request->sent_stamp, session); - if (hash_order) { - unsigned hash = ceph_frag_value(diri->hash_dentry_name(dname)); - if (hash != last_hash) - readdir_offset = 2; - last_hash = hash; - dn->offset = dir_result_t::make_fpos(hash, readdir_offset++, true); - } else { - dn->offset = dir_result_t::make_fpos(fg, readdir_offset++, false); - } - // add to readdir cache - if (dirp->release_count == diri->dir_release_count && - dirp->ordered_count == diri->dir_ordered_count && - dirp->start_shared_gen == diri->shared_gen) { - if (dirp->cache_index == dir->readdir_cache.size()) { - if (i == 0) { - assert(!dirp->inode->is_complete_and_ordered()); - dir->readdir_cache.reserve(dirp->cache_index + numdn); - } - dir->readdir_cache.push_back(dn); - } else if (dirp->cache_index < dir->readdir_cache.size()) { - if (dirp->inode->is_complete_and_ordered()) - assert(dir->readdir_cache[dirp->cache_index] == dn); - else - dir->readdir_cache[dirp->cache_index] = dn; - } else { - assert(0 == "unexpected readdir buffer idx"); - } - dirp->cache_index++; - } - // add to cached result list - dirp->buffer.push_back(dir_result_t::dentry(dn->offset, dname, in)); - ldout(cct, 15) << __func__ << " " << hex << dn->offset << dec << ": '" << dname << "' -> " << in->ino << dendl; - } - - if (numdn > 0) - dirp->last_name = dname; - if (end) - dirp->next_offset = 2; - else - dirp->next_offset = readdir_offset; - - if (dir->is_empty()) - close_dir(dir); - } -} - -/** insert_trace - * - * insert a trace from a MDS reply into the cache. - */ -Inode* Client::insert_trace(MetaRequest *request, MetaSession *session) -{ - MClientReply *reply = request->reply; - int op = request->get_op(); - - ldout(cct, 10) << "insert_trace from " << request->sent_stamp << " mds." << session->mds_num - << " is_target=" << (int)reply->head.is_target - << " is_dentry=" << (int)reply->head.is_dentry - << dendl; - - bufferlist::iterator p = reply->get_trace_bl().begin(); - if (request->got_unsafe) { - ldout(cct, 10) << "insert_trace -- already got unsafe; ignoring" << dendl; - assert(p.end()); - return NULL; - } - - if (p.end()) { - ldout(cct, 10) << "insert_trace -- no trace" << dendl; - - Dentry *d = request->dentry(); - if (d) { - Inode *diri = d->dir->parent_inode; - diri->dir_release_count++; - clear_dir_complete_and_ordered(diri, true); - } - - if (d && reply->get_result() == 0) { - if (op == CEPH_MDS_OP_RENAME) { - // rename - Dentry *od = request->old_dentry(); - ldout(cct, 10) << " unlinking rename src dn " << od << " for traceless reply" << dendl; - assert(od); - unlink(od, true, true); // keep dir, dentry - } else if (op == CEPH_MDS_OP_RMDIR || - op == CEPH_MDS_OP_UNLINK) { - // unlink, rmdir - ldout(cct, 10) << " unlinking unlink/rmdir dn " << d << " for traceless reply" << dendl; - unlink(d, true, true); // keep dir, dentry - } - } - return NULL; - } - - ConnectionRef con = request->reply->get_connection(); - uint64_t features = con->get_features(); - ldout(cct, 10) << " features 0x" << hex << features << dec << dendl; - - // snap trace - SnapRealm *realm = NULL; - if (reply->snapbl.length()) - update_snap_trace(reply->snapbl, &realm); - - ldout(cct, 10) << " hrm " - << " is_target=" << (int)reply->head.is_target - << " is_dentry=" << (int)reply->head.is_dentry - << dendl; - - InodeStat dirst; - DirStat dst; - string dname; - LeaseStat dlease; - InodeStat ist; - - if (reply->head.is_dentry) { - dirst.decode(p, features); - dst.decode(p); - ::decode(dname, p); - ::decode(dlease, p); - } - - Inode *in = 0; - if (reply->head.is_target) { - ist.decode(p, features); - if (cct->_conf->client_debug_getattr_caps) { - unsigned wanted = 0; - if (op == CEPH_MDS_OP_GETATTR || op == CEPH_MDS_OP_LOOKUP) - wanted = request->head.args.getattr.mask; - else if (op == CEPH_MDS_OP_OPEN || op == CEPH_MDS_OP_CREATE) - wanted = request->head.args.open.mask; - - if ((wanted & CEPH_CAP_XATTR_SHARED) && - !(ist.xattr_version > 0 && ist.xattrbl.length() > 0)) - assert(0 == "MDS reply does not contain xattrs"); - } - - in = add_update_inode(&ist, request->sent_stamp, session, - request->perms); - } - - Inode *diri = NULL; - if (reply->head.is_dentry) { - diri = add_update_inode(&dirst, request->sent_stamp, session, - request->perms); - update_dir_dist(diri, &dst); // dir stat info is attached to .. - - if (in) { - Dir *dir = diri->open_dir(); - insert_dentry_inode(dir, dname, &dlease, in, request->sent_stamp, session, - (op == CEPH_MDS_OP_RENAME) ? request->old_dentry() : NULL); - } else { - Dentry *dn = NULL; - if (diri->dir && diri->dir->dentries.count(dname)) { - dn = diri->dir->dentries[dname]; - if (dn->inode) { - diri->dir_ordered_count++; - clear_dir_complete_and_ordered(diri, false); - unlink(dn, true, true); // keep dir, dentry - } - } - if (dlease.duration_ms > 0) { - if (!dn) { - Dir *dir = diri->open_dir(); - dn = link(dir, dname, NULL, NULL); - } - update_dentry_lease(dn, &dlease, request->sent_stamp, session); - } - } - } else if (op == CEPH_MDS_OP_LOOKUPSNAP || - op == CEPH_MDS_OP_MKSNAP) { - ldout(cct, 10) << " faking snap lookup weirdness" << dendl; - // fake it for snap lookup - vinodeno_t vino = ist.vino; - vino.snapid = CEPH_SNAPDIR; - assert(inode_map.count(vino)); - diri = inode_map[vino]; - - string dname = request->path.last_dentry(); - - LeaseStat dlease; - dlease.duration_ms = 0; - - if (in) { - Dir *dir = diri->open_dir(); - insert_dentry_inode(dir, dname, &dlease, in, request->sent_stamp, session); - } else { - if (diri->dir && diri->dir->dentries.count(dname)) { - Dentry *dn = diri->dir->dentries[dname]; - if (dn->inode) - unlink(dn, true, true); // keep dir, dentry - } - } - } - - if (in) { - if (op == CEPH_MDS_OP_READDIR || - op == CEPH_MDS_OP_LSSNAP) { - insert_readdir_results(request, session, in); - } else if (op == CEPH_MDS_OP_LOOKUPNAME) { - // hack: return parent inode instead - in = diri; - } - - if (request->dentry() == NULL && in != request->inode()) { - // pin the target inode if its parent dentry is not pinned - request->set_other_inode(in); - } - } - - if (realm) - put_snap_realm(realm); - - request->target = in; - return in; -} - -// ------- - -mds_rank_t Client::choose_target_mds(MetaRequest *req, Inode** phash_diri) -{ - mds_rank_t mds = MDS_RANK_NONE; - __u32 hash = 0; - bool is_hash = false; - - Inode *in = NULL; - Dentry *de = NULL; - Cap *cap = NULL; - - if (req->resend_mds >= 0) { - mds = req->resend_mds; - req->resend_mds = -1; - ldout(cct, 10) << "choose_target_mds resend_mds specified as mds." << mds << dendl; - goto out; - } - - if (cct->_conf->client_use_random_mds) - goto random_mds; - - in = req->inode(); - de = req->dentry(); - if (in) { - ldout(cct, 20) << "choose_target_mds starting with req->inode " << *in << dendl; - if (req->path.depth()) { - hash = in->hash_dentry_name(req->path[0]); - ldout(cct, 20) << "choose_target_mds inode dir hash is " << (int)in->dir_layout.dl_dir_hash - << " on " << req->path[0] - << " => " << hash << dendl; - is_hash = true; - } - } else if (de) { - if (de->inode) { - in = de->inode.get(); - ldout(cct, 20) << "choose_target_mds starting with req->dentry inode " << *in << dendl; - } else { - in = de->dir->parent_inode; - hash = in->hash_dentry_name(de->name); - ldout(cct, 20) << "choose_target_mds dentry dir hash is " << (int)in->dir_layout.dl_dir_hash - << " on " << de->name - << " => " << hash << dendl; - is_hash = true; - } - } - if (in) { - if (in->snapid != CEPH_NOSNAP) { - ldout(cct, 10) << "choose_target_mds " << *in << " is snapped, using nonsnap parent" << dendl; - while (in->snapid != CEPH_NOSNAP) { - if (in->snapid == CEPH_SNAPDIR) - in = in->snapdir_parent.get(); - else if (!in->dn_set.empty()) - /* In most cases there will only be one dentry, so getting it - * will be the correct action. If there are multiple hard links, - * I think the MDS should be able to redirect as needed*/ - in = in->get_first_parent()->dir->parent_inode; - else { - ldout(cct, 10) << "got unlinked inode, can't look at parent" << dendl; - break; - } - } - is_hash = false; - } - - ldout(cct, 20) << "choose_target_mds " << *in << " is_hash=" << is_hash - << " hash=" << hash << dendl; - - if (is_hash && S_ISDIR(in->mode) && !in->fragmap.empty()) { - frag_t fg = in->dirfragtree[hash]; - if (in->fragmap.count(fg)) { - mds = in->fragmap[fg]; - if (phash_diri) - *phash_diri = in; - ldout(cct, 10) << "choose_target_mds from dirfragtree hash" << dendl; - goto out; - } - } - - if (req->auth_is_best()) - cap = in->auth_cap; - if (!cap && !in->caps.empty()) - cap = in->caps.begin()->second; - if (!cap) - goto random_mds; - mds = cap->session->mds_num; - ldout(cct, 10) << "choose_target_mds from caps on inode " << *in << dendl; - - goto out; - } - -random_mds: - if (mds < 0) { - mds = _get_random_up_mds(); - ldout(cct, 10) << "did not get mds through better means, so chose random mds " << mds << dendl; - } - -out: - ldout(cct, 20) << "mds is " << mds << dendl; - return mds; -} - - -void Client::connect_mds_targets(mds_rank_t mds) -{ - ldout(cct, 10) << "connect_mds_targets for mds." << mds << dendl; - assert(mds_sessions.count(mds)); - const MDSMap::mds_info_t& info = mdsmap->get_mds_info(mds); - for (set::const_iterator q = info.export_targets.begin(); - q != info.export_targets.end(); - ++q) { - if (mds_sessions.count(*q) == 0 && - mdsmap->is_clientreplay_or_active_or_stopping(*q)) { - ldout(cct, 10) << "check_mds_sessions opening mds." << mds - << " export target mds." << *q << dendl; - _open_mds_session(*q); - } - } -} - -void Client::dump_mds_sessions(Formatter *f) -{ - f->dump_int("id", get_nodeid().v); - f->open_array_section("sessions"); - for (map::const_iterator p = mds_sessions.begin(); p != mds_sessions.end(); ++p) { - f->open_object_section("session"); - p->second->dump(f); - f->close_section(); - } - f->close_section(); - f->dump_int("mdsmap_epoch", mdsmap->get_epoch()); -} -void Client::dump_mds_requests(Formatter *f) -{ - for (map::iterator p = mds_requests.begin(); - p != mds_requests.end(); - ++p) { - f->open_object_section("request"); - p->second->dump(f); - f->close_section(); - } -} - -int Client::verify_reply_trace(int r, - MetaRequest *request, MClientReply *reply, - InodeRef *ptarget, bool *pcreated, - const UserPerm& perms) -{ - // check whether this request actually did the create, and set created flag - bufferlist extra_bl; - inodeno_t created_ino; - bool got_created_ino = false; - ceph::unordered_map::iterator p; - - extra_bl.claim(reply->get_extra_bl()); - if (extra_bl.length() >= 8) { - // if the extra bufferlist has a buffer, we assume its the created inode - // and that this request to create succeeded in actually creating - // the inode (won the race with other create requests) - ::decode(created_ino, extra_bl); - got_created_ino = true; - ldout(cct, 10) << "make_request created ino " << created_ino << dendl; - } - - if (pcreated) - *pcreated = got_created_ino; - - if (request->target) { - *ptarget = request->target; - ldout(cct, 20) << "make_request target is " << *ptarget->get() << dendl; - } else { - if (got_created_ino && (p = inode_map.find(vinodeno_t(created_ino, CEPH_NOSNAP))) != inode_map.end()) { - (*ptarget) = p->second; - ldout(cct, 20) << "make_request created, target is " << *ptarget->get() << dendl; - } else { - // we got a traceless reply, and need to look up what we just - // created. for now, do this by name. someday, do this by the - // ino... which we know! FIXME. - InodeRef target; - Dentry *d = request->dentry(); - if (d) { - if (d->dir) { - ldout(cct, 10) << "make_request got traceless reply, looking up #" - << d->dir->parent_inode->ino << "/" << d->name - << " got_ino " << got_created_ino - << " ino " << created_ino - << dendl; - r = _do_lookup(d->dir->parent_inode, d->name, request->regetattr_mask, - &target, perms); - } else { - // if the dentry is not linked, just do our best. see #5021. - assert(0 == "how did this happen? i want logs!"); - } - } else { - Inode *in = request->inode(); - ldout(cct, 10) << "make_request got traceless reply, forcing getattr on #" - << in->ino << dendl; - r = _getattr(in, request->regetattr_mask, perms, true); - target = in; - } - if (r >= 0) { - // verify ino returned in reply and trace_dist are the same - if (got_created_ino && - created_ino.val != target->ino.val) { - ldout(cct, 5) << "create got ino " << created_ino << " but then failed on lookup; EINTR?" << dendl; - r = -EINTR; - } - if (ptarget) - ptarget->swap(target); - } - } - } - - return r; -} - - -/** - * make a request - * - * Blocking helper to make an MDS request. - * - * If the ptarget flag is set, behavior changes slightly: the caller - * expects to get a pointer to the inode we are creating or operating - * on. As a result, we will follow up any traceless mutation reply - * with a getattr or lookup to transparently handle a traceless reply - * from the MDS (as when the MDS restarts and the client has to replay - * a request). - * - * @param request the MetaRequest to execute - * @param perms The user uid/gid to execute as (eventually, full group lists?) - * @param ptarget [optional] address to store a pointer to the target inode we want to create or operate on - * @param pcreated [optional; required if ptarget] where to store a bool of whether our create atomically created a file - * @param use_mds [optional] prefer a specific mds (-1 for default) - * @param pdirbl [optional; disallowed if ptarget] where to pass extra reply payload to the caller - */ -int Client::make_request(MetaRequest *request, - const UserPerm& perms, - InodeRef *ptarget, bool *pcreated, - mds_rank_t use_mds, - bufferlist *pdirbl) -{ - int r = 0; - - // assign a unique tid - ceph_tid_t tid = ++last_tid; - request->set_tid(tid); - - // and timestamp - request->op_stamp = ceph_clock_now(); - - // make note - mds_requests[tid] = request->get(); - if (oldest_tid == 0 && request->get_op() != CEPH_MDS_OP_SETFILELOCK) - oldest_tid = tid; - - request->set_caller_perms(perms); - - if (cct->_conf->client_inject_fixed_oldest_tid) { - ldout(cct, 20) << __func__ << " injecting fixed oldest_client_tid(1)" << dendl; - request->set_oldest_client_tid(1); - } else { - request->set_oldest_client_tid(oldest_tid); - } - - // hack target mds? - if (use_mds >= 0) - request->resend_mds = use_mds; - - while (1) { - if (request->aborted()) - break; - - if (blacklisted) { - request->abort(-EBLACKLISTED); - break; - } - - // set up wait cond - Cond caller_cond; - request->caller_cond = &caller_cond; - - // choose mds - Inode *hash_diri = NULL; - mds_rank_t mds = choose_target_mds(request, &hash_diri); - int mds_state = (mds == MDS_RANK_NONE) ? MDSMap::STATE_NULL : mdsmap->get_state(mds); - if (mds_state != MDSMap::STATE_ACTIVE && mds_state != MDSMap::STATE_STOPPING) { - if (mds_state == MDSMap::STATE_NULL && mds >= mdsmap->get_max_mds()) { - if (hash_diri) { - ldout(cct, 10) << " target mds." << mds << " has stopped, remove it from fragmap" << dendl; - _fragmap_remove_stopped_mds(hash_diri, mds); - } else { - ldout(cct, 10) << " target mds." << mds << " has stopped, trying a random mds" << dendl; - request->resend_mds = _get_random_up_mds(); - } - } else { - ldout(cct, 10) << " target mds." << mds << " not active, waiting for new mdsmap" << dendl; - wait_on_list(waiting_for_mdsmap); - } - continue; - } - - // open a session? - MetaSession *session = NULL; - if (!have_open_session(mds)) { - session = _get_or_open_mds_session(mds); - - // wait - if (session->state == MetaSession::STATE_OPENING) { - ldout(cct, 10) << "waiting for session to mds." << mds << " to open" << dendl; - wait_on_context_list(session->waiting_for_open); - // Abort requests on REJECT from MDS - if (rejected_by_mds.count(mds)) { - request->abort(-EPERM); - break; - } - continue; - } - - if (!have_open_session(mds)) - continue; - } else { - session = mds_sessions[mds]; - } - - // send request. - send_request(request, session); - - // wait for signal - ldout(cct, 20) << "awaiting reply|forward|kick on " << &caller_cond << dendl; - request->kick = false; - while (!request->reply && // reply - request->resend_mds < 0 && // forward - !request->kick) - caller_cond.Wait(client_lock); - request->caller_cond = NULL; - - // did we get a reply? - if (request->reply) - break; - } - - if (!request->reply) { - assert(request->aborted()); - assert(!request->got_unsafe); - r = request->get_abort_code(); - request->item.remove_myself(); - unregister_request(request); - put_request(request); // ours - return r; - } - - // got it! - MClientReply *reply = request->reply; - request->reply = NULL; - r = reply->get_result(); - if (r >= 0) - request->success = true; - - // kick dispatcher (we've got it!) - assert(request->dispatch_cond); - request->dispatch_cond->Signal(); - ldout(cct, 20) << "sendrecv kickback on tid " << tid << " " << request->dispatch_cond << dendl; - request->dispatch_cond = 0; - - if (r >= 0 && ptarget) - r = verify_reply_trace(r, request, reply, ptarget, pcreated, perms); - - if (pdirbl) - pdirbl->claim(reply->get_extra_bl()); - - // -- log times -- - utime_t lat = ceph_clock_now(); - lat -= request->sent_stamp; - ldout(cct, 20) << "lat " << lat << dendl; - logger->tinc(l_c_lat, lat); - logger->tinc(l_c_reply, lat); - - put_request(request); - - reply->put(); - return r; -} - -void Client::unregister_request(MetaRequest *req) -{ - mds_requests.erase(req->tid); - if (req->tid == oldest_tid) { - map::iterator p = mds_requests.upper_bound(oldest_tid); - while (true) { - if (p == mds_requests.end()) { - oldest_tid = 0; - break; - } - if (p->second->get_op() != CEPH_MDS_OP_SETFILELOCK) { - oldest_tid = p->first; - break; - } - ++p; - } - } - put_request(req); -} - -void Client::put_request(MetaRequest *request) -{ - if (request->_put()) { - int op = -1; - if (request->success) - op = request->get_op(); - InodeRef other_in; - request->take_other_inode(&other_in); - delete request; - - if (other_in && - (op == CEPH_MDS_OP_RMDIR || - op == CEPH_MDS_OP_RENAME || - op == CEPH_MDS_OP_RMSNAP)) { - _try_to_trim_inode(other_in.get(), false); - } - } -} - -int Client::encode_inode_release(Inode *in, MetaRequest *req, - mds_rank_t mds, int drop, - int unless, int force) -{ - ldout(cct, 20) << "encode_inode_release enter(in:" << *in << ", req:" << req - << " mds:" << mds << ", drop:" << drop << ", unless:" << unless - << ", have:" << ", force:" << force << ")" << dendl; - int released = 0; - if (in->caps.count(mds)) { - Cap *caps = in->caps[mds]; - drop &= ~(in->dirty_caps | get_caps_used(in)); - if ((drop & caps->issued) && - !(unless & caps->issued)) { - ldout(cct, 25) << "Dropping caps. Initial " << ccap_string(caps->issued) << dendl; - caps->issued &= ~drop; - caps->implemented &= ~drop; - released = 1; - ldout(cct, 25) << "Now have: " << ccap_string(caps->issued) << dendl; - } else { - released = force; - } - if (released) { - ceph_mds_request_release rel; - rel.ino = in->ino; - rel.cap_id = caps->cap_id; - rel.seq = caps->seq; - rel.issue_seq = caps->issue_seq; - rel.mseq = caps->mseq; - rel.caps = caps->implemented; - rel.wanted = caps->wanted; - rel.dname_len = 0; - rel.dname_seq = 0; - req->cap_releases.push_back(MClientRequest::Release(rel,"")); - } - } - ldout(cct, 25) << "encode_inode_release exit(in:" << *in << ") released:" - << released << dendl; - return released; -} - -void Client::encode_dentry_release(Dentry *dn, MetaRequest *req, - mds_rank_t mds, int drop, int unless) -{ - ldout(cct, 20) << "encode_dentry_release enter(dn:" - << dn << ")" << dendl; - int released = 0; - if (dn->dir) - released = encode_inode_release(dn->dir->parent_inode, req, - mds, drop, unless, 1); - if (released && dn->lease_mds == mds) { - ldout(cct, 25) << "preemptively releasing dn to mds" << dendl; - MClientRequest::Release& rel = req->cap_releases.back(); - rel.item.dname_len = dn->name.length(); - rel.item.dname_seq = dn->lease_seq; - rel.dname = dn->name; - } - ldout(cct, 25) << "encode_dentry_release exit(dn:" - << dn << ")" << dendl; -} - - -/* - * This requires the MClientRequest *request member to be set. - * It will error out horribly without one. - * Additionally, if you set any *drop member, you'd better have - * set the corresponding dentry! - */ -void Client::encode_cap_releases(MetaRequest *req, mds_rank_t mds) -{ - ldout(cct, 20) << "encode_cap_releases enter (req: " - << req << ", mds: " << mds << ")" << dendl; - if (req->inode_drop && req->inode()) - encode_inode_release(req->inode(), req, - mds, req->inode_drop, - req->inode_unless); - - if (req->old_inode_drop && req->old_inode()) - encode_inode_release(req->old_inode(), req, - mds, req->old_inode_drop, - req->old_inode_unless); - if (req->other_inode_drop && req->other_inode()) - encode_inode_release(req->other_inode(), req, - mds, req->other_inode_drop, - req->other_inode_unless); - - if (req->dentry_drop && req->dentry()) - encode_dentry_release(req->dentry(), req, - mds, req->dentry_drop, - req->dentry_unless); - - if (req->old_dentry_drop && req->old_dentry()) - encode_dentry_release(req->old_dentry(), req, - mds, req->old_dentry_drop, - req->old_dentry_unless); - ldout(cct, 25) << "encode_cap_releases exit (req: " - << req << ", mds " << mds <state == MetaSession::STATE_OPEN || - mds_sessions[mds]->state == MetaSession::STATE_STALE); -} - -MetaSession *Client::_get_mds_session(mds_rank_t mds, Connection *con) -{ - if (mds_sessions.count(mds) == 0) - return NULL; - MetaSession *s = mds_sessions[mds]; - if (s->con != con) - return NULL; - return s; -} - -MetaSession *Client::_get_or_open_mds_session(mds_rank_t mds) -{ - if (mds_sessions.count(mds)) - return mds_sessions[mds]; - return _open_mds_session(mds); -} - -/** - * Populate a map of strings with client-identifying metadata, - * such as the hostname. Call this once at initialization. - */ -void Client::populate_metadata(const std::string &mount_root) -{ - // Hostname - struct utsname u; - int r = uname(&u); - if (r >= 0) { - metadata["hostname"] = u.nodename; - ldout(cct, 20) << __func__ << " read hostname '" << u.nodename << "'" << dendl; - } else { - ldout(cct, 1) << __func__ << " failed to read hostname (" << cpp_strerror(r) << ")" << dendl; - } - - metadata["pid"] = stringify(getpid()); - - // Ceph entity id (the '0' in "client.0") - metadata["entity_id"] = cct->_conf->name.get_id(); - - // Our mount position - if (!mount_root.empty()) { - metadata["root"] = mount_root; - } - - // Ceph version - metadata["ceph_version"] = pretty_version_to_str(); - metadata["ceph_sha1"] = git_version_to_str(); - - // Apply any metadata from the user's configured overrides - std::vector tokens; - get_str_vec(cct->_conf->client_metadata, ",", tokens); - for (const auto &i : tokens) { - auto eqpos = i.find("="); - // Throw out anything that isn't of the form "=" - if (eqpos == 0 || eqpos == std::string::npos || eqpos == i.size()) { - lderr(cct) << "Invalid metadata keyval pair: '" << i << "'" << dendl; - continue; - } - metadata[i.substr(0, eqpos)] = i.substr(eqpos + 1); - } -} - -/** - * Optionally add or override client metadata fields. - */ -void Client::update_metadata(std::string const &k, std::string const &v) -{ - Mutex::Locker l(client_lock); - assert(initialized); - - if (metadata.count(k)) { - ldout(cct, 1) << __func__ << " warning, overriding metadata field '" << k - << "' from '" << metadata[k] << "' to '" << v << "'" << dendl; - } - - metadata[k] = v; -} - -MetaSession *Client::_open_mds_session(mds_rank_t mds) -{ - ldout(cct, 10) << "_open_mds_session mds." << mds << dendl; - assert(mds_sessions.count(mds) == 0); - MetaSession *session = new MetaSession; - session->mds_num = mds; - session->seq = 0; - session->inst = mdsmap->get_inst(mds); - session->con = messenger->get_connection(session->inst); - session->state = MetaSession::STATE_OPENING; - session->mds_state = MDSMap::STATE_NULL; - mds_sessions[mds] = session; - - // Maybe skip sending a request to open if this MDS daemon - // has previously sent us a REJECT. - if (rejected_by_mds.count(mds)) { - if (rejected_by_mds[mds] == session->inst) { - ldout(cct, 4) << "_open_mds_session mds." << mds << " skipping " - "because we were rejected" << dendl; - return session; - } else { - ldout(cct, 4) << "_open_mds_session mds." << mds << " old inst " - "rejected us, trying with new inst" << dendl; - rejected_by_mds.erase(mds); - } - } - - MClientSession *m = new MClientSession(CEPH_SESSION_REQUEST_OPEN); - m->client_meta = metadata; - session->con->send_message(m); - return session; -} - -void Client::_close_mds_session(MetaSession *s) -{ - ldout(cct, 2) << "_close_mds_session mds." << s->mds_num << " seq " << s->seq << dendl; - s->state = MetaSession::STATE_CLOSING; - s->con->send_message(new MClientSession(CEPH_SESSION_REQUEST_CLOSE, s->seq)); -} - -void Client::_closed_mds_session(MetaSession *s) -{ - s->state = MetaSession::STATE_CLOSED; - s->con->mark_down(); - signal_context_list(s->waiting_for_open); - mount_cond.Signal(); - remove_session_caps(s); - kick_requests_closed(s); - mds_sessions.erase(s->mds_num); - delete s; -} - -void Client::handle_client_session(MClientSession *m) -{ - mds_rank_t from = mds_rank_t(m->get_source().num()); - ldout(cct, 10) << "handle_client_session " << *m << " from mds." << from << dendl; - - MetaSession *session = _get_mds_session(from, m->get_connection().get()); - if (!session) { - ldout(cct, 10) << " discarding session message from sessionless mds " << m->get_source_inst() << dendl; - m->put(); - return; - } - - switch (m->get_op()) { - case CEPH_SESSION_OPEN: - renew_caps(session); - session->state = MetaSession::STATE_OPEN; - if (unmounting) - mount_cond.Signal(); - else - connect_mds_targets(from); - signal_context_list(session->waiting_for_open); - break; - - case CEPH_SESSION_CLOSE: - _closed_mds_session(session); - break; - - case CEPH_SESSION_RENEWCAPS: - if (session->cap_renew_seq == m->get_seq()) { - session->cap_ttl = - session->last_cap_renew_request + mdsmap->get_session_timeout(); - wake_inode_waiters(session); - } - break; - - case CEPH_SESSION_STALE: - renew_caps(session); - break; - - case CEPH_SESSION_RECALL_STATE: - trim_caps(session, m->get_max_caps()); - break; - - case CEPH_SESSION_FLUSHMSG: - session->con->send_message(new MClientSession(CEPH_SESSION_FLUSHMSG_ACK, m->get_seq())); - break; - - case CEPH_SESSION_FORCE_RO: - force_session_readonly(session); - break; - - case CEPH_SESSION_REJECT: - rejected_by_mds[session->mds_num] = session->inst; - _closed_mds_session(session); - - break; - - default: - ceph_abort(); - } - - m->put(); -} - -bool Client::_any_stale_sessions() const -{ - assert(client_lock.is_locked_by_me()); - - for (const auto &i : mds_sessions) { - if (i.second->state == MetaSession::STATE_STALE) { - return true; - } - } - - return false; -} - -void Client::_kick_stale_sessions() -{ - ldout(cct, 1) << "kick_stale_sessions" << dendl; - - for (map::iterator p = mds_sessions.begin(); - p != mds_sessions.end(); ) { - MetaSession *s = p->second; - ++p; - if (s->state == MetaSession::STATE_STALE) - _closed_mds_session(s); - } -} - -void Client::send_request(MetaRequest *request, MetaSession *session, - bool drop_cap_releases) -{ - // make the request - mds_rank_t mds = session->mds_num; - ldout(cct, 10) << "send_request rebuilding request " << request->get_tid() - << " for mds." << mds << dendl; - MClientRequest *r = build_client_request(request); - if (request->dentry()) { - r->set_dentry_wanted(); - } - if (request->got_unsafe) { - r->set_replayed_op(); - if (request->target) - r->head.ino = request->target->ino; - } else { - encode_cap_releases(request, mds); - if (drop_cap_releases) // we haven't send cap reconnect yet, drop cap releases - request->cap_releases.clear(); - else - r->releases.swap(request->cap_releases); - } - r->set_mdsmap_epoch(mdsmap->get_epoch()); - if (r->head.op == CEPH_MDS_OP_SETXATTR) { - objecter->with_osdmap([r](const OSDMap& o) { - r->set_osdmap_epoch(o.get_epoch()); - }); - } - - if (request->mds == -1) { - request->sent_stamp = ceph_clock_now(); - ldout(cct, 20) << "send_request set sent_stamp to " << request->sent_stamp << dendl; - } - request->mds = mds; - - Inode *in = request->inode(); - if (in && in->caps.count(mds)) - request->sent_on_mseq = in->caps[mds]->mseq; - - session->requests.push_back(&request->item); - - ldout(cct, 10) << "send_request " << *r << " to mds." << mds << dendl; - session->con->send_message(r); -} - -MClientRequest* Client::build_client_request(MetaRequest *request) -{ - MClientRequest *req = new MClientRequest(request->get_op()); - req->set_tid(request->tid); - req->set_stamp(request->op_stamp); - memcpy(&req->head, &request->head, sizeof(ceph_mds_request_head)); - - // if the filepath's haven't been set, set them! - if (request->path.empty()) { - Inode *in = request->inode(); - Dentry *de = request->dentry(); - if (in) - in->make_nosnap_relative_path(request->path); - else if (de) { - if (de->inode) - de->inode->make_nosnap_relative_path(request->path); - else if (de->dir) { - de->dir->parent_inode->make_nosnap_relative_path(request->path); - request->path.push_dentry(de->name); - } - else ldout(cct, 1) << "Warning -- unable to construct a filepath!" - << " No path, inode, or appropriately-endowed dentry given!" - << dendl; - } else ldout(cct, 1) << "Warning -- unable to construct a filepath!" - << " No path, inode, or dentry given!" - << dendl; - } - req->set_filepath(request->get_filepath()); - req->set_filepath2(request->get_filepath2()); - req->set_data(request->data); - req->set_retry_attempt(request->retry_attempt++); - req->head.num_fwd = request->num_fwd; - const gid_t *_gids; - int gid_count = request->perms.get_gids(&_gids); - req->set_gid_list(gid_count, _gids); - return req; -} - - - -void Client::handle_client_request_forward(MClientRequestForward *fwd) -{ - mds_rank_t mds = mds_rank_t(fwd->get_source().num()); - MetaSession *session = _get_mds_session(mds, fwd->get_connection().get()); - if (!session) { - fwd->put(); - return; - } - ceph_tid_t tid = fwd->get_tid(); - - if (mds_requests.count(tid) == 0) { - ldout(cct, 10) << "handle_client_request_forward no pending request on tid " << tid << dendl; - fwd->put(); - return; - } - - MetaRequest *request = mds_requests[tid]; - assert(request); - - // reset retry counter - request->retry_attempt = 0; - - // request not forwarded, or dest mds has no session. - // resend. - ldout(cct, 10) << "handle_client_request tid " << tid - << " fwd " << fwd->get_num_fwd() - << " to mds." << fwd->get_dest_mds() - << ", resending to " << fwd->get_dest_mds() - << dendl; - - request->mds = -1; - request->item.remove_myself(); - request->num_fwd = fwd->get_num_fwd(); - request->resend_mds = fwd->get_dest_mds(); - request->caller_cond->Signal(); - - fwd->put(); -} - -bool Client::is_dir_operation(MetaRequest *req) -{ - int op = req->get_op(); - if (op == CEPH_MDS_OP_MKNOD || op == CEPH_MDS_OP_LINK || - op == CEPH_MDS_OP_UNLINK || op == CEPH_MDS_OP_RENAME || - op == CEPH_MDS_OP_MKDIR || op == CEPH_MDS_OP_RMDIR || - op == CEPH_MDS_OP_SYMLINK || op == CEPH_MDS_OP_CREATE) - return true; - return false; -} - -void Client::handle_client_reply(MClientReply *reply) -{ - mds_rank_t mds_num = mds_rank_t(reply->get_source().num()); - MetaSession *session = _get_mds_session(mds_num, reply->get_connection().get()); - if (!session) { - reply->put(); - return; - } - - ceph_tid_t tid = reply->get_tid(); - bool is_safe = reply->is_safe(); - - if (mds_requests.count(tid) == 0) { - lderr(cct) << "handle_client_reply no pending request on tid " << tid - << " safe is:" << is_safe << dendl; - reply->put(); - return; - } - MetaRequest *request = mds_requests.at(tid); - - ldout(cct, 20) << "handle_client_reply got a reply. Safe:" << is_safe - << " tid " << tid << dendl; - - if (request->got_unsafe && !is_safe) { - //duplicate response - ldout(cct, 0) << "got a duplicate reply on tid " << tid << " from mds " - << mds_num << " safe:" << is_safe << dendl; - reply->put(); - return; - } - - if (-ESTALE == reply->get_result()) { // see if we can get to proper MDS - ldout(cct, 20) << "got ESTALE on tid " << request->tid - << " from mds." << request->mds << dendl; - request->send_to_auth = true; - request->resend_mds = choose_target_mds(request); - Inode *in = request->inode(); - if (request->resend_mds >= 0 && - request->resend_mds == request->mds && - (in == NULL || - in->caps.count(request->resend_mds) == 0 || - request->sent_on_mseq == in->caps[request->resend_mds]->mseq)) { - // have to return ESTALE - } else { - request->caller_cond->Signal(); - reply->put(); - return; - } - ldout(cct, 20) << "have to return ESTALE" << dendl; - } - - assert(request->reply == NULL); - request->reply = reply; - insert_trace(request, session); - - // Handle unsafe reply - if (!is_safe) { - request->got_unsafe = true; - session->unsafe_requests.push_back(&request->unsafe_item); - if (is_dir_operation(request)) { - Inode *dir = request->inode(); - assert(dir); - dir->unsafe_ops.push_back(&request->unsafe_dir_item); - } - if (request->target) { - InodeRef &in = request->target; - in->unsafe_ops.push_back(&request->unsafe_target_item); - } - } - - // Only signal the caller once (on the first reply): - // Either its an unsafe reply, or its a safe reply and no unsafe reply was sent. - if (!is_safe || !request->got_unsafe) { - Cond cond; - request->dispatch_cond = &cond; - - // wake up waiter - ldout(cct, 20) << "handle_client_reply signalling caller " << (void*)request->caller_cond << dendl; - request->caller_cond->Signal(); - - // wake for kick back - while (request->dispatch_cond) { - ldout(cct, 20) << "handle_client_reply awaiting kickback on tid " << tid << " " << &cond << dendl; - cond.Wait(client_lock); - } - } - - if (is_safe) { - // the filesystem change is committed to disk - // we're done, clean up - if (request->got_unsafe) { - request->unsafe_item.remove_myself(); - request->unsafe_dir_item.remove_myself(); - request->unsafe_target_item.remove_myself(); - signal_cond_list(request->waitfor_safe); - } - request->item.remove_myself(); - unregister_request(request); - } - if (unmounting) - mount_cond.Signal(); -} - -void Client::_handle_full_flag(int64_t pool) -{ - ldout(cct, 1) << __func__ << ": FULL: cancelling outstanding operations " - << "on " << pool << dendl; - // Cancel all outstanding ops in this pool with -ENOSPC: it is necessary - // to do this rather than blocking, because otherwise when we fill up we - // potentially lock caps forever on files with dirty pages, and we need - // to be able to release those caps to the MDS so that it can delete files - // and free up space. - epoch_t cancelled_epoch = objecter->op_cancel_writes(-ENOSPC, pool); - - // For all inodes with layouts in this pool and a pending flush write op - // (i.e. one of the ones we will cancel), we've got to purge_set their data - // from ObjectCacher so that it doesn't re-issue the write in response to - // the ENOSPC error. - // Fortunately since we're cancelling everything in a given pool, we don't - // need to know which ops belong to which ObjectSet, we can just blow all - // the un-flushed cached data away and mark any dirty inodes' async_err - // field with -ENOSPC as long as we're sure all the ops we cancelled were - // affecting this pool, and all the objectsets we're purging were also - // in this pool. - for (unordered_map::iterator i = inode_map.begin(); - i != inode_map.end(); ++i) - { - Inode *inode = i->second; - if (inode->oset.dirty_or_tx - && (pool == -1 || inode->layout.pool_id == pool)) { - ldout(cct, 4) << __func__ << ": FULL: inode 0x" << std::hex << i->first << std::dec - << " has dirty objects, purging and setting ENOSPC" << dendl; - objectcacher->purge_set(&inode->oset); - inode->set_async_err(-ENOSPC); - } - } - - if (cancelled_epoch != (epoch_t)-1) { - set_cap_epoch_barrier(cancelled_epoch); - } -} - -void Client::handle_osd_map(MOSDMap *m) -{ - std::set new_blacklists; - objecter->consume_blacklist_events(&new_blacklists); - - const auto myaddr = messenger->get_myaddr(); - if (!blacklisted && new_blacklists.count(myaddr)) { - auto epoch = objecter->with_osdmap([](const OSDMap &o){ - return o.get_epoch(); - }); - lderr(cct) << "I was blacklisted at osd epoch " << epoch << dendl; - blacklisted = true; - for (std::map::iterator p = mds_requests.begin(); - p != mds_requests.end(); ) { - auto req = p->second; - ++p; - req->abort(-EBLACKLISTED); - if (req->caller_cond) { - req->kick = true; - req->caller_cond->Signal(); - } - } - - // Progress aborts on any requests that were on this waitlist. Any - // requests that were on a waiting_for_open session waitlist - // will get kicked during close session below. - signal_cond_list(waiting_for_mdsmap); - - // Force-close all sessions: assume this is not abandoning any state - // on the MDS side because the MDS will have seen the blacklist too. - while(!mds_sessions.empty()) { - auto i = mds_sessions.begin(); - auto session = i->second; - _closed_mds_session(session); - } - - // Since we know all our OSD ops will fail, cancel them all preemtively, - // so that on an unhealthy cluster we can umount promptly even if e.g. - // some PGs were inaccessible. - objecter->op_cancel_writes(-EBLACKLISTED); - - } else if (blacklisted) { - // Handle case where we were blacklisted but no longer are - blacklisted = objecter->with_osdmap([myaddr](const OSDMap &o){ - return o.is_blacklisted(myaddr);}); - } - - if (objecter->osdmap_full_flag()) { - _handle_full_flag(-1); - } else { - // Accumulate local list of full pools so that I can drop - // the objecter lock before re-entering objecter in - // cancel_writes - std::vector full_pools; - - objecter->with_osdmap([&full_pools](const OSDMap &o) { - for (const auto& kv : o.get_pools()) { - if (kv.second.has_flag(pg_pool_t::FLAG_FULL)) { - full_pools.push_back(kv.first); - } - } - }); - - for (auto p : full_pools) - _handle_full_flag(p); - - // Subscribe to subsequent maps to watch for the full flag going - // away. For the global full flag objecter does this for us, but - // it pays no attention to the per-pool full flag so in this branch - // we do it ourselves. - if (!full_pools.empty()) { - objecter->maybe_request_map(); - } - } - - m->put(); -} - - -// ------------------------ -// incoming messages - - -bool Client::ms_dispatch(Message *m) -{ - Mutex::Locker l(client_lock); - if (!initialized) { - ldout(cct, 10) << "inactive, discarding " << *m << dendl; - m->put(); - return true; - } - - switch (m->get_type()) { - // mounting and mds sessions - case CEPH_MSG_MDS_MAP: - handle_mds_map(static_cast(m)); - break; - case CEPH_MSG_FS_MAP: - handle_fs_map(static_cast(m)); - break; - case CEPH_MSG_FS_MAP_USER: - handle_fs_map_user(static_cast(m)); - break; - case CEPH_MSG_CLIENT_SESSION: - handle_client_session(static_cast(m)); - break; - - case CEPH_MSG_OSD_MAP: - handle_osd_map(static_cast(m)); - break; - - // requests - case CEPH_MSG_CLIENT_REQUEST_FORWARD: - handle_client_request_forward(static_cast(m)); - break; - case CEPH_MSG_CLIENT_REPLY: - handle_client_reply(static_cast(m)); - break; - - case CEPH_MSG_CLIENT_SNAP: - handle_snap(static_cast(m)); - break; - case CEPH_MSG_CLIENT_CAPS: - handle_caps(static_cast(m)); - break; - case CEPH_MSG_CLIENT_LEASE: - handle_lease(static_cast(m)); - break; - case MSG_COMMAND_REPLY: - if (m->get_source().type() == CEPH_ENTITY_TYPE_MDS) { - handle_command_reply(static_cast(m)); - } else { - return false; - } - break; - case CEPH_MSG_CLIENT_QUOTA: - handle_quota(static_cast(m)); - break; - - default: - return false; - } - - // unmounting? - if (unmounting) { - ldout(cct, 10) << "unmounting: trim pass, size was " << lru.lru_get_size() - << "+" << inode_map.size() << dendl; - long unsigned size = lru.lru_get_size() + inode_map.size(); - trim_cache(); - if (size < lru.lru_get_size() + inode_map.size()) { - ldout(cct, 10) << "unmounting: trim pass, cache shrank, poking unmount()" << dendl; - mount_cond.Signal(); - } else { - ldout(cct, 10) << "unmounting: trim pass, size still " << lru.lru_get_size() - << "+" << inode_map.size() << dendl; - } - } - - return true; -} - -void Client::handle_fs_map(MFSMap *m) -{ - fsmap.reset(new FSMap(m->get_fsmap())); - m->put(); - - signal_cond_list(waiting_for_fsmap); - - monclient->sub_got("fsmap", fsmap->get_epoch()); -} - -void Client::handle_fs_map_user(MFSMapUser *m) -{ - fsmap_user.reset(new FSMapUser); - *fsmap_user = m->get_fsmap(); - m->put(); - - monclient->sub_got("fsmap.user", fsmap_user->get_epoch()); - signal_cond_list(waiting_for_fsmap); -} - -void Client::handle_mds_map(MMDSMap* m) -{ - if (m->get_epoch() <= mdsmap->get_epoch()) { - ldout(cct, 1) << "handle_mds_map epoch " << m->get_epoch() - << " is identical to or older than our " - << mdsmap->get_epoch() << dendl; - m->put(); - return; - } - - ldout(cct, 1) << "handle_mds_map epoch " << m->get_epoch() << dendl; - - std::unique_ptr oldmap(new MDSMap); - oldmap.swap(mdsmap); - - mdsmap->decode(m->get_encoded()); - - // Cancel any commands for missing or laggy GIDs - std::list cancel_ops; - auto &commands = command_table.get_commands(); - for (const auto &i : commands) { - auto &op = i.second; - const mds_gid_t op_mds_gid = op.mds_gid; - if (mdsmap->is_dne_gid(op_mds_gid) || mdsmap->is_laggy_gid(op_mds_gid)) { - ldout(cct, 1) << __func__ << ": cancelling command op " << i.first << dendl; - cancel_ops.push_back(i.first); - if (op.outs) { - std::ostringstream ss; - ss << "MDS " << op_mds_gid << " went away"; - *(op.outs) = ss.str(); - } - op.con->mark_down(); - if (op.on_finish) { - op.on_finish->complete(-ETIMEDOUT); - } - } - } - - for (std::list::iterator i = cancel_ops.begin(); - i != cancel_ops.end(); ++i) { - command_table.erase(*i); - } - - // reset session - for (map::iterator p = mds_sessions.begin(); - p != mds_sessions.end(); ) { - mds_rank_t mds = p->first; - MetaSession *session = p->second; - ++p; - - int oldstate = oldmap->get_state(mds); - int newstate = mdsmap->get_state(mds); - if (!mdsmap->is_up(mds)) { - session->con->mark_down(); - } else if (mdsmap->get_inst(mds) != session->inst) { - session->con->mark_down(); - session->inst = mdsmap->get_inst(mds); - // When new MDS starts to take over, notify kernel to trim unused entries - // in its dcache/icache. Hopefully, the kernel will release some unused - // inodes before the new MDS enters reconnect state. - trim_cache_for_reconnect(session); - } else if (oldstate == newstate) - continue; // no change - - session->mds_state = newstate; - if (newstate == MDSMap::STATE_RECONNECT) { - session->con = messenger->get_connection(session->inst); - send_reconnect(session); - } else if (newstate >= MDSMap::STATE_ACTIVE) { - if (oldstate < MDSMap::STATE_ACTIVE) { - // kick new requests - kick_requests(session); - kick_flushing_caps(session); - signal_context_list(session->waiting_for_open); - kick_maxsize_requests(session); - wake_inode_waiters(session); - } - connect_mds_targets(mds); - } else if (newstate == MDSMap::STATE_NULL && - mds >= mdsmap->get_max_mds()) { - _closed_mds_session(session); - } - } - - // kick any waiting threads - signal_cond_list(waiting_for_mdsmap); - - m->put(); - - monclient->sub_got("mdsmap", mdsmap->get_epoch()); -} - -void Client::send_reconnect(MetaSession *session) -{ - mds_rank_t mds = session->mds_num; - ldout(cct, 10) << "send_reconnect to mds." << mds << dendl; - - // trim unused caps to reduce MDS's cache rejoin time - trim_cache_for_reconnect(session); - - session->readonly = false; - - if (session->release) { - session->release->put(); - session->release = NULL; - } - - // reset my cap seq number - session->seq = 0; - //connect to the mds' offload targets - connect_mds_targets(mds); - //make sure unsafe requests get saved - resend_unsafe_requests(session); - - MClientReconnect *m = new MClientReconnect; - - // i have an open session. - ceph::unordered_set did_snaprealm; - for (ceph::unordered_map::iterator p = inode_map.begin(); - p != inode_map.end(); - ++p) { - Inode *in = p->second; - if (in->caps.count(mds)) { - ldout(cct, 10) << " caps on " << p->first - << " " << ccap_string(in->caps[mds]->issued) - << " wants " << ccap_string(in->caps_wanted()) - << dendl; - filepath path; - in->make_long_path(path); - ldout(cct, 10) << " path " << path << dendl; - - bufferlist flockbl; - _encode_filelocks(in, flockbl); - - Cap *cap = in->caps[mds]; - cap->seq = 0; // reset seq. - cap->issue_seq = 0; // reset seq. - cap->mseq = 0; // reset seq. - cap->issued = cap->implemented; - - snapid_t snap_follows = 0; - if (!in->cap_snaps.empty()) - snap_follows = in->cap_snaps.begin()->first; - - m->add_cap(p->first.ino, - cap->cap_id, - path.get_ino(), path.get_path(), // ino - in->caps_wanted(), // wanted - cap->issued, // issued - in->snaprealm->ino, - snap_follows, - flockbl); - - if (did_snaprealm.count(in->snaprealm->ino) == 0) { - ldout(cct, 10) << " snaprealm " << *in->snaprealm << dendl; - m->add_snaprealm(in->snaprealm->ino, in->snaprealm->seq, in->snaprealm->parent); - did_snaprealm.insert(in->snaprealm->ino); - } - } - } - - early_kick_flushing_caps(session); - - session->con->send_message(m); - - mount_cond.Signal(); -} - - -void Client::kick_requests(MetaSession *session) -{ - ldout(cct, 10) << "kick_requests for mds." << session->mds_num << dendl; - for (map::iterator p = mds_requests.begin(); - p != mds_requests.end(); - ++p) { - MetaRequest *req = p->second; - if (req->got_unsafe) - continue; - if (req->aborted()) { - if (req->caller_cond) { - req->kick = true; - req->caller_cond->Signal(); - } - continue; - } - if (req->retry_attempt > 0) - continue; // new requests only - if (req->mds == session->mds_num) { - send_request(p->second, session); - } - } -} - -void Client::resend_unsafe_requests(MetaSession *session) -{ - for (xlist::iterator iter = session->unsafe_requests.begin(); - !iter.end(); - ++iter) - send_request(*iter, session); - - // also re-send old requests when MDS enters reconnect stage. So that MDS can - // process completed requests in clientreplay stage. - for (map::iterator p = mds_requests.begin(); - p != mds_requests.end(); - ++p) { - MetaRequest *req = p->second; - if (req->got_unsafe) - continue; - if (req->aborted()) - continue; - if (req->retry_attempt == 0) - continue; // old requests only - if (req->mds == session->mds_num) - send_request(req, session, true); - } -} - -void Client::wait_unsafe_requests() -{ - list last_unsafe_reqs; - for (map::iterator p = mds_sessions.begin(); - p != mds_sessions.end(); - ++p) { - MetaSession *s = p->second; - if (!s->unsafe_requests.empty()) { - MetaRequest *req = s->unsafe_requests.back(); - req->get(); - last_unsafe_reqs.push_back(req); - } - } - - for (list::iterator p = last_unsafe_reqs.begin(); - p != last_unsafe_reqs.end(); - ++p) { - MetaRequest *req = *p; - if (req->unsafe_item.is_on_list()) - wait_on_list(req->waitfor_safe); - put_request(req); - } -} - -void Client::kick_requests_closed(MetaSession *session) -{ - ldout(cct, 10) << "kick_requests_closed for mds." << session->mds_num << dendl; - for (map::iterator p = mds_requests.begin(); - p != mds_requests.end(); ) { - MetaRequest *req = p->second; - ++p; - if (req->mds == session->mds_num) { - if (req->caller_cond) { - req->kick = true; - req->caller_cond->Signal(); - } - req->item.remove_myself(); - if (req->got_unsafe) { - lderr(cct) << "kick_requests_closed removing unsafe request " << req->get_tid() << dendl; - req->unsafe_item.remove_myself(); - req->unsafe_dir_item.remove_myself(); - req->unsafe_target_item.remove_myself(); - signal_cond_list(req->waitfor_safe); - unregister_request(req); - } - } - } - assert(session->requests.empty()); - assert(session->unsafe_requests.empty()); -} - - - - -/************ - * leases - */ - -void Client::got_mds_push(MetaSession *s) -{ - s->seq++; - ldout(cct, 10) << " mds." << s->mds_num << " seq now " << s->seq << dendl; - if (s->state == MetaSession::STATE_CLOSING) { - s->con->send_message(new MClientSession(CEPH_SESSION_REQUEST_CLOSE, s->seq)); - } -} - -void Client::handle_lease(MClientLease *m) -{ - ldout(cct, 10) << "handle_lease " << *m << dendl; - - assert(m->get_action() == CEPH_MDS_LEASE_REVOKE); - - mds_rank_t mds = mds_rank_t(m->get_source().num()); - MetaSession *session = _get_mds_session(mds, m->get_connection().get()); - if (!session) { - m->put(); - return; - } - - got_mds_push(session); - - ceph_seq_t seq = m->get_seq(); - - Inode *in; - vinodeno_t vino(m->get_ino(), CEPH_NOSNAP); - if (inode_map.count(vino) == 0) { - ldout(cct, 10) << " don't have vino " << vino << dendl; - goto revoke; - } - in = inode_map[vino]; - - if (m->get_mask() & CEPH_LOCK_DN) { - if (!in->dir || in->dir->dentries.count(m->dname) == 0) { - ldout(cct, 10) << " don't have dir|dentry " << m->get_ino() << "/" << m->dname <dir->dentries[m->dname]; - ldout(cct, 10) << " revoked DN lease on " << dn << dendl; - dn->lease_mds = -1; - } - - revoke: - m->get_connection()->send_message( - new MClientLease( - CEPH_MDS_LEASE_RELEASE, seq, - m->get_mask(), m->get_ino(), m->get_first(), m->get_last(), m->dname)); - m->put(); -} - -void Client::put_inode(Inode *in, int n) -{ - ldout(cct, 10) << "put_inode on " << *in << dendl; - int left = in->_put(n); - if (left == 0) { - // release any caps - remove_all_caps(in); - - ldout(cct, 10) << "put_inode deleting " << *in << dendl; - bool unclean = objectcacher->release_set(&in->oset); - assert(!unclean); - inode_map.erase(in->vino()); - if (use_faked_inos()) - _release_faked_ino(in); - - if (in == root) { - root = 0; - root_ancestor = 0; - while (!root_parents.empty()) - root_parents.erase(root_parents.begin()); - } - - delete in; - } -} - -void Client::close_dir(Dir *dir) -{ - Inode *in = dir->parent_inode; - ldout(cct, 15) << "close_dir dir " << dir << " on " << in << dendl; - assert(dir->is_empty()); - assert(in->dir == dir); - assert(in->dn_set.size() < 2); // dirs can't be hard-linked - if (!in->dn_set.empty()) - in->get_first_parent()->put(); // unpin dentry - - delete in->dir; - in->dir = 0; - put_inode(in); // unpin inode -} - - /** - * Don't call this with in==NULL, use get_or_create for that - * leave dn set to default NULL unless you're trying to add - * a new inode to a pre-created Dentry - */ -Dentry* Client::link(Dir *dir, const string& name, Inode *in, Dentry *dn) -{ - if (!dn) { - // create a new Dentry - dn = new Dentry; - dn->name = name; - - // link to dir - dn->dir = dir; - dir->dentries[dn->name] = dn; - lru.lru_insert_mid(dn); // mid or top? - - ldout(cct, 15) << "link dir " << dir->parent_inode << " '" << name << "' to inode " << in - << " dn " << dn << " (new dn)" << dendl; - } else { - ldout(cct, 15) << "link dir " << dir->parent_inode << " '" << name << "' to inode " << in - << " dn " << dn << " (old dn)" << dendl; - } - - if (in) { // link to inode - dn->inode = in; - if (in->is_dir()) { - if (in->dir) - dn->get(); // dir -> dn pin - if (in->ll_ref) - dn->get(); // ll_ref -> dn pin - } - - assert(in->dn_set.count(dn) == 0); - - // only one parent for directories! - if (in->is_dir() && !in->dn_set.empty()) { - Dentry *olddn = in->get_first_parent(); - assert(olddn->dir != dir || olddn->name != name); - Inode *old_diri = olddn->dir->parent_inode; - old_diri->dir_release_count++; - clear_dir_complete_and_ordered(old_diri, true); - unlink(olddn, true, true); // keep dir, dentry - } - - in->dn_set.insert(dn); - - ldout(cct, 20) << "link inode " << in << " parents now " << in->dn_set << dendl; - } - - return dn; -} - -void Client::unlink(Dentry *dn, bool keepdir, bool keepdentry) -{ - InodeRef in; - in.swap(dn->inode); - ldout(cct, 15) << "unlink dir " << dn->dir->parent_inode << " '" << dn->name << "' dn " << dn - << " inode " << dn->inode << dendl; - - // unlink from inode - if (in) { - if (in->is_dir()) { - if (in->dir) - dn->put(); // dir -> dn pin - if (in->ll_ref) - dn->put(); // ll_ref -> dn pin - } - dn->inode = 0; - assert(in->dn_set.count(dn)); - in->dn_set.erase(dn); - ldout(cct, 20) << "unlink inode " << in << " parents now " << in->dn_set << dendl; - } - - if (keepdentry) { - dn->lease_mds = -1; - } else { - ldout(cct, 15) << "unlink removing '" << dn->name << "' dn " << dn << dendl; - - // unlink from dir - dn->dir->dentries.erase(dn->name); - if (dn->dir->is_empty() && !keepdir) - close_dir(dn->dir); - dn->dir = 0; - - // delete den - lru.lru_remove(dn); - dn->put(); - } -} - -/** - * For asynchronous flushes, check for errors from the IO and - * update the inode if necessary - */ -class C_Client_FlushComplete : public Context { -private: - Client *client; - InodeRef inode; -public: - C_Client_FlushComplete(Client *c, Inode *in) : client(c), inode(in) { } - void finish(int r) override { - assert(client->client_lock.is_locked_by_me()); - if (r != 0) { - client_t const whoami = client->whoami; // For the benefit of ldout prefix - ldout(client->cct, 1) << "I/O error from flush on inode " << inode - << " 0x" << std::hex << inode->ino << std::dec - << ": " << r << "(" << cpp_strerror(r) << ")" << dendl; - inode->set_async_err(r); - } - } -}; - - -/**** - * caps - */ - -void Client::get_cap_ref(Inode *in, int cap) -{ - if ((cap & CEPH_CAP_FILE_BUFFER) && - in->cap_refs[CEPH_CAP_FILE_BUFFER] == 0) { - ldout(cct, 5) << "get_cap_ref got first FILE_BUFFER ref on " << *in << dendl; - in->get(); - } - if ((cap & CEPH_CAP_FILE_CACHE) && - in->cap_refs[CEPH_CAP_FILE_CACHE] == 0) { - ldout(cct, 5) << "get_cap_ref got first FILE_CACHE ref on " << *in << dendl; - in->get(); - } - in->get_cap_ref(cap); -} - -void Client::put_cap_ref(Inode *in, int cap) -{ - int last = in->put_cap_ref(cap); - if (last) { - int put_nref = 0; - int drop = last & ~in->caps_issued(); - if (in->snapid == CEPH_NOSNAP) { - if ((last & CEPH_CAP_FILE_WR) && - !in->cap_snaps.empty() && - in->cap_snaps.rbegin()->second.writing) { - ldout(cct, 10) << "put_cap_ref finishing pending cap_snap on " << *in << dendl; - in->cap_snaps.rbegin()->second.writing = 0; - finish_cap_snap(in, in->cap_snaps.rbegin()->second, get_caps_used(in)); - signal_cond_list(in->waitfor_caps); // wake up blocked sync writers - } - if (last & CEPH_CAP_FILE_BUFFER) { - for (auto &p : in->cap_snaps) - p.second.dirty_data = 0; - signal_cond_list(in->waitfor_commit); - ldout(cct, 5) << "put_cap_ref dropped last FILE_BUFFER ref on " << *in << dendl; - ++put_nref; - } - } - if (last & CEPH_CAP_FILE_CACHE) { - ldout(cct, 5) << "put_cap_ref dropped last FILE_CACHE ref on " << *in << dendl; - ++put_nref; - } - if (drop) - check_caps(in, 0); - if (put_nref) - put_inode(in, put_nref); - } -} - -int Client::get_caps(Inode *in, int need, int want, int *phave, loff_t endoff) -{ - int r = check_pool_perm(in, need); - if (r < 0) - return r; - - while (1) { - int file_wanted = in->caps_file_wanted(); - if ((file_wanted & need) != need) { - ldout(cct, 10) << "get_caps " << *in << " need " << ccap_string(need) - << " file_wanted " << ccap_string(file_wanted) << ", EBADF " - << dendl; - return -EBADF; - } - - int implemented; - int have = in->caps_issued(&implemented); - - bool waitfor_caps = false; - bool waitfor_commit = false; - - if (have & need & CEPH_CAP_FILE_WR) { - if (endoff > 0 && - (endoff >= (loff_t)in->max_size || - endoff > (loff_t)(in->size << 1)) && - endoff > (loff_t)in->wanted_max_size) { - ldout(cct, 10) << "wanted_max_size " << in->wanted_max_size << " -> " << endoff << dendl; - in->wanted_max_size = endoff; - check_caps(in, 0); - } - - if (endoff >= 0 && endoff > (loff_t)in->max_size) { - ldout(cct, 10) << "waiting on max_size, endoff " << endoff << " max_size " << in->max_size << " on " << *in << dendl; - waitfor_caps = true; - } - if (!in->cap_snaps.empty()) { - if (in->cap_snaps.rbegin()->second.writing) { - ldout(cct, 10) << "waiting on cap_snap write to complete" << dendl; - waitfor_caps = true; - } - for (auto &p : in->cap_snaps) { - if (p.second.dirty_data) { - waitfor_commit = true; - break; - } - } - if (waitfor_commit) { - _flush(in, new C_Client_FlushComplete(this, in)); - ldout(cct, 10) << "waiting for WRBUFFER to get dropped" << dendl; - } - } - } - - if (!waitfor_caps && !waitfor_commit) { - if ((have & need) == need) { - int revoking = implemented & ~have; - ldout(cct, 10) << "get_caps " << *in << " have " << ccap_string(have) - << " need " << ccap_string(need) << " want " << ccap_string(want) - << " revoking " << ccap_string(revoking) - << dendl; - if ((revoking & want) == 0) { - *phave = need | (have & want); - in->get_cap_ref(need); - return 0; - } - } - ldout(cct, 10) << "waiting for caps " << *in << " need " << ccap_string(need) << " want " << ccap_string(want) << dendl; - waitfor_caps = true; - } - - if ((need & CEPH_CAP_FILE_WR) && in->auth_cap && - in->auth_cap->session->readonly) - return -EROFS; - - if (in->flags & I_CAP_DROPPED) { - int mds_wanted = in->caps_mds_wanted(); - if ((mds_wanted & need) != need) { - int ret = _renew_caps(in); - if (ret < 0) - return ret; - continue; - } - if ((mds_wanted & file_wanted) == - (file_wanted & (CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR))) { - in->flags &= ~I_CAP_DROPPED; - } - } - - if (waitfor_caps) - wait_on_list(in->waitfor_caps); - else if (waitfor_commit) - wait_on_list(in->waitfor_commit); - } -} - -int Client::get_caps_used(Inode *in) -{ - unsigned used = in->caps_used(); - if (!(used & CEPH_CAP_FILE_CACHE) && - !objectcacher->set_is_empty(&in->oset)) - used |= CEPH_CAP_FILE_CACHE; - return used; -} - -void Client::cap_delay_requeue(Inode *in) -{ - ldout(cct, 10) << "cap_delay_requeue on " << *in << dendl; - in->hold_caps_until = ceph_clock_now(); - in->hold_caps_until += cct->_conf->client_caps_release_delay; - delayed_caps.push_back(&in->cap_item); -} - -void Client::send_cap(Inode *in, MetaSession *session, Cap *cap, - bool sync, int used, int want, int retain, - int flush, ceph_tid_t flush_tid) -{ - int held = cap->issued | cap->implemented; - int revoking = cap->implemented & ~cap->issued; - retain &= ~revoking; - int dropping = cap->issued & ~retain; - int op = CEPH_CAP_OP_UPDATE; - - ldout(cct, 10) << "send_cap " << *in - << " mds." << session->mds_num << " seq " << cap->seq - << (sync ? " sync " : " async ") - << " used " << ccap_string(used) - << " want " << ccap_string(want) - << " flush " << ccap_string(flush) - << " retain " << ccap_string(retain) - << " held "<< ccap_string(held) - << " revoking " << ccap_string(revoking) - << " dropping " << ccap_string(dropping) - << dendl; - - if (cct->_conf->client_inject_release_failure && revoking) { - const int would_have_issued = cap->issued & retain; - const int would_have_implemented = cap->implemented & (cap->issued | used); - // Simulated bug: - // - tell the server we think issued is whatever they issued plus whatever we implemented - // - leave what we have implemented in place - ldout(cct, 20) << __func__ << " injecting failure to release caps" << dendl; - cap->issued = cap->issued | cap->implemented; - - // Make an exception for revoking xattr caps: we are injecting - // failure to release other caps, but allow xattr because client - // will block on xattr ops if it can't release these to MDS (#9800) - const int xattr_mask = CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; - cap->issued ^= xattr_mask & revoking; - cap->implemented ^= xattr_mask & revoking; - - ldout(cct, 20) << __func__ << " issued " << ccap_string(cap->issued) << " vs " << ccap_string(would_have_issued) << dendl; - ldout(cct, 20) << __func__ << " implemented " << ccap_string(cap->implemented) << " vs " << ccap_string(would_have_implemented) << dendl; - } else { - // Normal behaviour - cap->issued &= retain; - cap->implemented &= cap->issued | used; - } - - snapid_t follows = 0; - - if (flush) - follows = in->snaprealm->get_snap_context().seq; - - MClientCaps *m = new MClientCaps(op, - in->ino, - 0, - cap->cap_id, cap->seq, - cap->implemented, - want, - flush, - cap->mseq, - cap_epoch_barrier); - m->caller_uid = in->cap_dirtier_uid; - m->caller_gid = in->cap_dirtier_gid; - - m->head.issue_seq = cap->issue_seq; - m->set_tid(flush_tid); - - m->head.uid = in->uid; - m->head.gid = in->gid; - m->head.mode = in->mode; - - m->head.nlink = in->nlink; - - if (flush & CEPH_CAP_XATTR_EXCL) { - ::encode(in->xattrs, m->xattrbl); - m->head.xattr_version = in->xattr_version; - } - - m->size = in->size; - m->max_size = in->max_size; - m->truncate_seq = in->truncate_seq; - m->truncate_size = in->truncate_size; - m->mtime = in->mtime; - m->atime = in->atime; - m->ctime = in->ctime; - m->btime = in->btime; - m->time_warp_seq = in->time_warp_seq; - m->change_attr = in->change_attr; - if (sync) - m->flags |= CLIENT_CAPS_SYNC; - - if (flush & CEPH_CAP_FILE_WR) { - m->inline_version = in->inline_version; - m->inline_data = in->inline_data; - } - - in->reported_size = in->size; - m->set_snap_follows(follows); - cap->wanted = want; - if (cap == in->auth_cap) { - m->set_max_size(in->wanted_max_size); - in->requested_max_size = in->wanted_max_size; - ldout(cct, 15) << "auth cap, setting max_size = " << in->requested_max_size << dendl; - } - - if (!session->flushing_caps_tids.empty()) - m->set_oldest_flush_tid(*session->flushing_caps_tids.begin()); - - session->con->send_message(m); -} - -static bool is_max_size_approaching(Inode *in) -{ - /* mds will adjust max size according to the reported size */ - if (in->flushing_caps & CEPH_CAP_FILE_WR) - return false; - if (in->size >= in->max_size) - return true; - /* half of previous max_size increment has been used */ - if (in->max_size > in->reported_size && - (in->size << 1) >= in->max_size + in->reported_size) - return true; - return false; -} - -/** - * check_caps - * - * Examine currently used and wanted versus held caps. Release, flush or ack - * revoked caps to the MDS as appropriate. - * - * @param in the inode to check - * @param flags flags to apply to cap check - */ -void Client::check_caps(Inode *in, unsigned flags) -{ - unsigned wanted = in->caps_wanted(); - unsigned used = get_caps_used(in); - unsigned cap_used; - - if (in->is_dir() && (in->flags & I_COMPLETE)) { - // we do this here because we don't want to drop to Fs (and then - // drop the Fs if we do a create!) if that alone makes us send lookups - // to the MDS. Doing it in in->caps_wanted() has knock-on effects elsewhere - wanted |= CEPH_CAP_FILE_EXCL; - } - - int implemented; - int issued = in->caps_issued(&implemented); - int revoking = implemented & ~issued; - - int retain = wanted | used | CEPH_CAP_PIN; - if (!unmounting) { - if (wanted) - retain |= CEPH_CAP_ANY; - else - retain |= CEPH_CAP_ANY_SHARED; - } - - ldout(cct, 10) << "check_caps on " << *in - << " wanted " << ccap_string(wanted) - << " used " << ccap_string(used) - << " issued " << ccap_string(issued) - << " revoking " << ccap_string(revoking) - << " flags=" << flags - << dendl; - - if (in->snapid != CEPH_NOSNAP) - return; //snap caps last forever, can't write - - if (in->caps.empty()) - return; // guard if at end of func - - if ((revoking & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) && - (used & CEPH_CAP_FILE_CACHE) && !(used & CEPH_CAP_FILE_BUFFER)) - _release(in); - - if (!in->cap_snaps.empty()) - flush_snaps(in); - - if (flags & CHECK_CAPS_NODELAY) - in->hold_caps_until = utime_t(); - else - cap_delay_requeue(in); - - utime_t now = ceph_clock_now(); - - map::iterator it = in->caps.begin(); - while (it != in->caps.end()) { - mds_rank_t mds = it->first; - Cap *cap = it->second; - ++it; - - MetaSession *session = mds_sessions[mds]; - assert(session); - - cap_used = used; - if (in->auth_cap && cap != in->auth_cap) - cap_used &= ~in->auth_cap->issued; - - revoking = cap->implemented & ~cap->issued; - - ldout(cct, 10) << " cap mds." << mds - << " issued " << ccap_string(cap->issued) - << " implemented " << ccap_string(cap->implemented) - << " revoking " << ccap_string(revoking) << dendl; - - if (in->wanted_max_size > in->max_size && - in->wanted_max_size > in->requested_max_size && - cap == in->auth_cap) - goto ack; - - /* approaching file_max? */ - if ((cap->issued & CEPH_CAP_FILE_WR) && - cap == in->auth_cap && - is_max_size_approaching(in)) { - ldout(cct, 10) << "size " << in->size << " approaching max_size " << in->max_size - << ", reported " << in->reported_size << dendl; - goto ack; - } - - /* completed revocation? */ - if (revoking && (revoking & cap_used) == 0) { - ldout(cct, 10) << "completed revocation of " << ccap_string(cap->implemented & ~cap->issued) << dendl; - goto ack; - } - - /* want more caps from mds? */ - if (wanted & ~(cap->wanted | cap->issued)) - goto ack; - - if (!revoking && unmounting && (cap_used == 0)) - goto ack; - - if (wanted == cap->wanted && // mds knows what we want. - ((cap->issued & ~retain) == 0) &&// and we don't have anything we wouldn't like - !in->dirty_caps) // and we have no dirty caps - continue; - - if (now < in->hold_caps_until) { - ldout(cct, 10) << "delaying cap release" << dendl; - continue; - } - - ack: - // re-send old cap/snapcap flushes first. - if (session->mds_state >= MDSMap::STATE_RECONNECT && - session->mds_state < MDSMap::STATE_ACTIVE && - session->early_flushing_caps.count(in) == 0) { - ldout(cct, 20) << " reflushing caps (check_caps) on " << *in - << " to mds." << session->mds_num << dendl; - session->early_flushing_caps.insert(in); - if (in->cap_snaps.size()) - flush_snaps(in, true); - if (in->flushing_caps) - flush_caps(in, session, flags & CHECK_CAPS_SYNCHRONOUS); - } - - int flushing; - ceph_tid_t flush_tid; - if (in->auth_cap == cap && in->dirty_caps) { - flushing = mark_caps_flushing(in, &flush_tid); - } else { - flushing = 0; - flush_tid = 0; - } - - send_cap(in, session, cap, flags & CHECK_CAPS_SYNCHRONOUS, cap_used, wanted, - retain, flushing, flush_tid); - } -} - - -void Client::queue_cap_snap(Inode *in, SnapContext& old_snapc) -{ - int used = get_caps_used(in); - int dirty = in->caps_dirty(); - ldout(cct, 10) << "queue_cap_snap " << *in << " snapc " << old_snapc << " used " << ccap_string(used) << dendl; - - if (in->cap_snaps.size() && - in->cap_snaps.rbegin()->second.writing) { - ldout(cct, 10) << "queue_cap_snap already have pending cap_snap on " << *in << dendl; - return; - } else if (in->caps_dirty() || - (used & CEPH_CAP_FILE_WR) || - (dirty & CEPH_CAP_ANY_WR)) { - const auto &capsnapem = in->cap_snaps.emplace(std::piecewise_construct, std::make_tuple(old_snapc.seq), std::make_tuple(in)); - assert(capsnapem.second == true); /* element inserted */ - CapSnap &capsnap = capsnapem.first->second; - capsnap.context = old_snapc; - capsnap.issued = in->caps_issued(); - capsnap.dirty = in->caps_dirty(); - - capsnap.dirty_data = (used & CEPH_CAP_FILE_BUFFER); - - capsnap.uid = in->uid; - capsnap.gid = in->gid; - capsnap.mode = in->mode; - capsnap.btime = in->btime; - capsnap.xattrs = in->xattrs; - capsnap.xattr_version = in->xattr_version; - - if (used & CEPH_CAP_FILE_WR) { - ldout(cct, 10) << "queue_cap_snap WR used on " << *in << dendl; - capsnap.writing = 1; - } else { - finish_cap_snap(in, capsnap, used); - } - } else { - ldout(cct, 10) << "queue_cap_snap not dirty|writing on " << *in << dendl; - } -} - -void Client::finish_cap_snap(Inode *in, CapSnap &capsnap, int used) -{ - ldout(cct, 10) << "finish_cap_snap " << *in << " capsnap " << (void *)&capsnap << " used " << ccap_string(used) << dendl; - capsnap.size = in->size; - capsnap.mtime = in->mtime; - capsnap.atime = in->atime; - capsnap.ctime = in->ctime; - capsnap.time_warp_seq = in->time_warp_seq; - capsnap.change_attr = in->change_attr; - - capsnap.dirty |= in->caps_dirty(); - - if (capsnap.dirty & CEPH_CAP_FILE_WR) { - capsnap.inline_data = in->inline_data; - capsnap.inline_version = in->inline_version; - } - - if (used & CEPH_CAP_FILE_BUFFER) { - ldout(cct, 10) << "finish_cap_snap " << *in << " cap_snap " << &capsnap << " used " << used - << " WRBUFFER, delaying" << dendl; - } else { - capsnap.dirty_data = 0; - flush_snaps(in); - } -} - -void Client::_flushed_cap_snap(Inode *in, snapid_t seq) -{ - ldout(cct, 10) << "_flushed_cap_snap seq " << seq << " on " << *in << dendl; - in->cap_snaps.at(seq).dirty_data = 0; - flush_snaps(in); -} - -void Client::flush_snaps(Inode *in, bool all_again) -{ - ldout(cct, 10) << "flush_snaps on " << *in << " all_again " << all_again << dendl; - assert(in->cap_snaps.size()); - - // pick auth mds - assert(in->auth_cap); - MetaSession *session = in->auth_cap->session; - int mseq = in->auth_cap->mseq; - - for (auto &p : in->cap_snaps) { - CapSnap &capsnap = p.second; - if (!all_again) { - // only flush once per session - if (capsnap.flush_tid > 0) - continue; - } - - ldout(cct, 10) << "flush_snaps mds." << session->mds_num - << " follows " << p.first - << " size " << capsnap.size - << " mtime " << capsnap.mtime - << " dirty_data=" << capsnap.dirty_data - << " writing=" << capsnap.writing - << " on " << *in << dendl; - if (capsnap.dirty_data || capsnap.writing) - continue; - - if (capsnap.flush_tid == 0) { - capsnap.flush_tid = ++last_flush_tid; - if (!in->flushing_cap_item.is_on_list()) - session->flushing_caps.push_back(&in->flushing_cap_item); - session->flushing_caps_tids.insert(capsnap.flush_tid); - } - - MClientCaps *m = new MClientCaps(CEPH_CAP_OP_FLUSHSNAP, in->ino, in->snaprealm->ino, 0, mseq, - cap_epoch_barrier); - if (user_id >= 0) - m->caller_uid = user_id; - if (group_id >= 0) - m->caller_gid = group_id; - - m->set_client_tid(capsnap.flush_tid); - m->head.snap_follows = p.first; - - m->head.caps = capsnap.issued; - m->head.dirty = capsnap.dirty; - - m->head.uid = capsnap.uid; - m->head.gid = capsnap.gid; - m->head.mode = capsnap.mode; - m->btime = capsnap.btime; - - m->size = capsnap.size; - - m->head.xattr_version = capsnap.xattr_version; - ::encode(capsnap.xattrs, m->xattrbl); - - m->ctime = capsnap.ctime; - m->btime = capsnap.btime; - m->mtime = capsnap.mtime; - m->atime = capsnap.atime; - m->time_warp_seq = capsnap.time_warp_seq; - m->change_attr = capsnap.change_attr; - - if (capsnap.dirty & CEPH_CAP_FILE_WR) { - m->inline_version = in->inline_version; - m->inline_data = in->inline_data; - } - - assert(!session->flushing_caps_tids.empty()); - m->set_oldest_flush_tid(*session->flushing_caps_tids.begin()); - - session->con->send_message(m); - } -} - - - -void Client::wait_on_list(list& ls) -{ - Cond cond; - ls.push_back(&cond); - cond.Wait(client_lock); - ls.remove(&cond); -} - -void Client::signal_cond_list(list& ls) -{ - for (list::iterator it = ls.begin(); it != ls.end(); ++it) - (*it)->Signal(); -} - -void Client::wait_on_context_list(list& ls) -{ - Cond cond; - bool done = false; - int r; - ls.push_back(new C_Cond(&cond, &done, &r)); - while (!done) - cond.Wait(client_lock); -} - -void Client::signal_context_list(list& ls) -{ - while (!ls.empty()) { - ls.front()->complete(0); - ls.pop_front(); - } -} - -void Client::wake_inode_waiters(MetaSession *s) -{ - xlist::iterator iter = s->caps.begin(); - while (!iter.end()){ - signal_cond_list((*iter)->inode->waitfor_caps); - ++iter; - } -} - - -// flush dirty data (from objectcache) - -class C_Client_CacheInvalidate : public Context { -private: - Client *client; - vinodeno_t ino; - int64_t offset, length; -public: - C_Client_CacheInvalidate(Client *c, Inode *in, int64_t off, int64_t len) : - client(c), offset(off), length(len) { - if (client->use_faked_inos()) - ino = vinodeno_t(in->faked_ino, CEPH_NOSNAP); - else - ino = in->vino(); - } - void finish(int r) override { - // _async_invalidate takes the lock when it needs to, call this back from outside of lock. - assert(!client->client_lock.is_locked_by_me()); - client->_async_invalidate(ino, offset, length); - } -}; - -void Client::_async_invalidate(vinodeno_t ino, int64_t off, int64_t len) -{ - if (unmounting) - return; - ldout(cct, 10) << "_async_invalidate " << ino << " " << off << "~" << len << dendl; - ino_invalidate_cb(callback_handle, ino, off, len); -} - -void Client::_schedule_invalidate_callback(Inode *in, int64_t off, int64_t len) { - - if (ino_invalidate_cb) - // we queue the invalidate, which calls the callback and decrements the ref - async_ino_invalidator.queue(new C_Client_CacheInvalidate(this, in, off, len)); -} - -void Client::_invalidate_inode_cache(Inode *in) -{ - ldout(cct, 10) << "_invalidate_inode_cache " << *in << dendl; - - // invalidate our userspace inode cache - if (cct->_conf->client_oc) - objectcacher->release_set(&in->oset); - - _schedule_invalidate_callback(in, 0, 0); -} - -void Client::_invalidate_inode_cache(Inode *in, int64_t off, int64_t len) -{ - ldout(cct, 10) << "_invalidate_inode_cache " << *in << " " << off << "~" << len << dendl; - - // invalidate our userspace inode cache - if (cct->_conf->client_oc) { - vector ls; - Striper::file_to_extents(cct, in->ino, &in->layout, off, len, in->truncate_size, ls); - objectcacher->discard_set(&in->oset, ls); - } - - _schedule_invalidate_callback(in, off, len); -} - -bool Client::_release(Inode *in) -{ - ldout(cct, 20) << "_release " << *in << dendl; - if (in->cap_refs[CEPH_CAP_FILE_CACHE] == 0) { - _invalidate_inode_cache(in); - return true; - } - return false; -} - -bool Client::_flush(Inode *in, Context *onfinish) -{ - ldout(cct, 10) << "_flush " << *in << dendl; - - if (!in->oset.dirty_or_tx) { - ldout(cct, 10) << " nothing to flush" << dendl; - onfinish->complete(0); - return true; - } - - if (objecter->osdmap_pool_full(in->layout.pool_id)) { - ldout(cct, 1) << __func__ << ": FULL, purging for ENOSPC" << dendl; - objectcacher->purge_set(&in->oset); - if (onfinish) { - onfinish->complete(-ENOSPC); - } - return true; - } - - return objectcacher->flush_set(&in->oset, onfinish); -} - -void Client::_flush_range(Inode *in, int64_t offset, uint64_t size) -{ - assert(client_lock.is_locked()); - if (!in->oset.dirty_or_tx) { - ldout(cct, 10) << " nothing to flush" << dendl; - return; - } - - Mutex flock("Client::_flush_range flock"); - Cond cond; - bool safe = false; - Context *onflush = new C_SafeCond(&flock, &cond, &safe); - bool ret = objectcacher->file_flush(&in->oset, &in->layout, in->snaprealm->get_snap_context(), - offset, size, onflush); - if (!ret) { - // wait for flush - client_lock.Unlock(); - flock.Lock(); - while (!safe) - cond.Wait(flock); - flock.Unlock(); - client_lock.Lock(); - } -} - -void Client::flush_set_callback(ObjectCacher::ObjectSet *oset) -{ - // Mutex::Locker l(client_lock); - assert(client_lock.is_locked()); // will be called via dispatch() -> objecter -> ... - Inode *in = static_cast(oset->parent); - assert(in); - _flushed(in); -} - -void Client::_flushed(Inode *in) -{ - ldout(cct, 10) << "_flushed " << *in << dendl; - - put_cap_ref(in, CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_BUFFER); -} - - - -// checks common to add_update_cap, handle_cap_grant -void Client::check_cap_issue(Inode *in, Cap *cap, unsigned issued) -{ - unsigned had = in->caps_issued(); - - if ((issued & CEPH_CAP_FILE_CACHE) && - !(had & CEPH_CAP_FILE_CACHE)) - in->cache_gen++; - - if ((issued & CEPH_CAP_FILE_SHARED) && - !(had & CEPH_CAP_FILE_SHARED)) { - in->shared_gen++; - - if (in->is_dir()) - clear_dir_complete_and_ordered(in, true); - } -} - -void Client::add_update_cap(Inode *in, MetaSession *mds_session, uint64_t cap_id, - unsigned issued, unsigned seq, unsigned mseq, inodeno_t realm, - int flags, const UserPerm& cap_perms) -{ - Cap *cap = 0; - mds_rank_t mds = mds_session->mds_num; - if (in->caps.count(mds)) { - cap = in->caps[mds]; - - /* - * auth mds of the inode changed. we received the cap export - * message, but still haven't received the cap import message. - * handle_cap_export() updated the new auth MDS' cap. - * - * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing - * a message that was send before the cap import message. So - * don't remove caps. - */ - if (ceph_seq_cmp(seq, cap->seq) <= 0) { - assert(cap == in->auth_cap); - assert(cap->cap_id == cap_id); - seq = cap->seq; - mseq = cap->mseq; - issued |= cap->issued; - flags |= CEPH_CAP_FLAG_AUTH; - } - } else { - mds_session->num_caps++; - if (!in->is_any_caps()) { - assert(in->snaprealm == 0); - in->snaprealm = get_snap_realm(realm); - in->snaprealm->inodes_with_caps.push_back(&in->snaprealm_item); - ldout(cct, 15) << "add_update_cap first one, opened snaprealm " << in->snaprealm << dendl; - } - in->caps[mds] = cap = new Cap; - - mds_session->caps.push_back(&cap->cap_item); - cap->session = mds_session; - cap->inode = in; - cap->gen = mds_session->cap_gen; - cap_list.push_back(&in->cap_item); - } - - check_cap_issue(in, cap, issued); - - if (flags & CEPH_CAP_FLAG_AUTH) { - if (in->auth_cap != cap && - (!in->auth_cap || ceph_seq_cmp(in->auth_cap->mseq, mseq) < 0)) { - if (in->auth_cap && in->flushing_cap_item.is_on_list()) { - ldout(cct, 10) << "add_update_cap changing auth cap: " - << "add myself to new auth MDS' flushing caps list" << dendl; - adjust_session_flushing_caps(in, in->auth_cap->session, mds_session); - } - in->auth_cap = cap; - } - } - - unsigned old_caps = cap->issued; - cap->cap_id = cap_id; - cap->issued |= issued; - cap->implemented |= issued; - cap->seq = seq; - cap->issue_seq = seq; - cap->mseq = mseq; - cap->latest_perms = cap_perms; - ldout(cct, 10) << "add_update_cap issued " << ccap_string(old_caps) << " -> " << ccap_string(cap->issued) - << " from mds." << mds - << " on " << *in - << dendl; - - if ((issued & ~old_caps) && in->auth_cap == cap) { - // non-auth MDS is revoking the newly grant caps ? - for (map::iterator it = in->caps.begin(); it != in->caps.end(); ++it) { - if (it->second == cap) - continue; - if (it->second->implemented & ~it->second->issued & issued) { - check_caps(in, CHECK_CAPS_NODELAY); - break; - } - } - } - - if (issued & ~old_caps) - signal_cond_list(in->waitfor_caps); -} - -void Client::remove_cap(Cap *cap, bool queue_release) -{ - Inode *in = cap->inode; - MetaSession *session = cap->session; - mds_rank_t mds = cap->session->mds_num; - - ldout(cct, 10) << "remove_cap mds." << mds << " on " << *in << dendl; - - if (queue_release) { - session->enqueue_cap_release( - in->ino, - cap->cap_id, - cap->issue_seq, - cap->mseq, - cap_epoch_barrier); - } - - if (in->auth_cap == cap) { - if (in->flushing_cap_item.is_on_list()) { - ldout(cct, 10) << " removing myself from flushing_cap list" << dendl; - in->flushing_cap_item.remove_myself(); - } - in->auth_cap = NULL; - } - assert(in->caps.count(mds)); - in->caps.erase(mds); - - cap->cap_item.remove_myself(); - delete cap; - cap = nullptr; - - if (!in->is_any_caps()) { - ldout(cct, 15) << "remove_cap last one, closing snaprealm " << in->snaprealm << dendl; - in->snaprealm_item.remove_myself(); - put_snap_realm(in->snaprealm); - in->snaprealm = 0; - } -} - -void Client::remove_all_caps(Inode *in) -{ - while (!in->caps.empty()) - remove_cap(in->caps.begin()->second, true); -} - -void Client::remove_session_caps(MetaSession *s) -{ - ldout(cct, 10) << "remove_session_caps mds." << s->mds_num << dendl; - - while (s->caps.size()) { - Cap *cap = *s->caps.begin(); - Inode *in = cap->inode; - bool dirty_caps = false, cap_snaps = false; - if (in->auth_cap == cap) { - cap_snaps = !in->cap_snaps.empty(); - dirty_caps = in->dirty_caps | in->flushing_caps; - in->wanted_max_size = 0; - in->requested_max_size = 0; - in->flags |= I_CAP_DROPPED; - } - remove_cap(cap, false); - signal_cond_list(in->waitfor_caps); - if (cap_snaps) { - InodeRef tmp_ref(in); - in->cap_snaps.clear(); - } - if (dirty_caps) { - lderr(cct) << "remove_session_caps still has dirty|flushing caps on " << *in << dendl; - if (in->flushing_caps) { - num_flushing_caps--; - in->flushing_cap_tids.clear(); - } - in->flushing_caps = 0; - in->dirty_caps = 0; - put_inode(in); - } - } - s->flushing_caps_tids.clear(); - sync_cond.Signal(); -} - -class C_Client_Remount : public Context { -private: - Client *client; -public: - explicit C_Client_Remount(Client *c) : client(c) {} - void finish(int r) override { - assert (r == 0); - r = client->remount_cb(client->callback_handle); - if (r != 0) { - client_t whoami = client->get_nodeid(); - lderr(client->cct) << "tried to remount (to trim kernel dentries) and got error " - << r << dendl; - if (client->require_remount && !client->unmounting) { - assert(0 == "failed to remount for kernel dentry trimming"); - } - } - } -}; - -void Client::_invalidate_kernel_dcache() -{ - if (unmounting) - return; - if (can_invalidate_dentries && dentry_invalidate_cb && root->dir) { - for (ceph::unordered_map::iterator p = root->dir->dentries.begin(); - p != root->dir->dentries.end(); - ++p) { - if (p->second->inode) - _schedule_invalidate_dentry_callback(p->second, false); - } - } else if (remount_cb) { - // Hacky: - // when remounting a file system, linux kernel trims all unused dentries in the fs - remount_finisher.queue(new C_Client_Remount(this)); - } -} - -void Client::trim_caps(MetaSession *s, int max) -{ - mds_rank_t mds = s->mds_num; - int caps_size = s->caps.size(); - ldout(cct, 10) << "trim_caps mds." << mds << " max " << max - << " caps " << caps_size << dendl; - - int trimmed = 0; - xlist::iterator p = s->caps.begin(); - while ((caps_size - trimmed) > max && !p.end()) { - Cap *cap = *p; - Inode *in = cap->inode; - - // Increment p early because it will be invalidated if cap - // is deleted inside remove_cap - ++p; - - if (in->caps.size() > 1 && cap != in->auth_cap) { - int mine = cap->issued | cap->implemented; - int oissued = in->auth_cap ? in->auth_cap->issued : 0; - // disposable non-auth cap - if (!(get_caps_used(in) & ~oissued & mine)) { - ldout(cct, 20) << " removing unused, unneeded non-auth cap on " << *in << dendl; - remove_cap(cap, true); - trimmed++; - } - } else { - ldout(cct, 20) << " trying to trim dentries for " << *in << dendl; - bool all = true; - set::iterator q = in->dn_set.begin(); - InodeRef tmp_ref(in); - while (q != in->dn_set.end()) { - Dentry *dn = *q++; - if (dn->lru_is_expireable()) { - if (can_invalidate_dentries && - dn->dir->parent_inode->ino == MDS_INO_ROOT) { - // Only issue one of these per DN for inodes in root: handle - // others more efficiently by calling for root-child DNs at - // the end of this function. - _schedule_invalidate_dentry_callback(dn, true); - } - trim_dentry(dn); - } else { - ldout(cct, 20) << " not expirable: " << dn->name << dendl; - all = false; - } - } - if (all && in->ino != MDS_INO_ROOT) { - ldout(cct, 20) << __func__ << " counting as trimmed: " << *in << dendl; - trimmed++; - } - } - } - - if (s->caps.size() > max) - _invalidate_kernel_dcache(); -} - -void Client::force_session_readonly(MetaSession *s) -{ - s->readonly = true; - for (xlist::iterator p = s->caps.begin(); !p.end(); ++p) { - Inode *in = (*p)->inode; - if (in->caps_wanted() & CEPH_CAP_FILE_WR) - signal_cond_list(in->waitfor_caps); - } -} - -void Client::mark_caps_dirty(Inode *in, int caps) -{ - ldout(cct, 10) << "mark_caps_dirty " << *in << " " << ccap_string(in->dirty_caps) << " -> " - << ccap_string(in->dirty_caps | caps) << dendl; - if (caps && !in->caps_dirty()) - in->get(); - in->dirty_caps |= caps; -} - -int Client::mark_caps_flushing(Inode *in, ceph_tid_t* ptid) -{ - MetaSession *session = in->auth_cap->session; - - int flushing = in->dirty_caps; - assert(flushing); - - ceph_tid_t flush_tid = ++last_flush_tid; - in->flushing_cap_tids[flush_tid] = flushing; - - if (!in->flushing_caps) { - ldout(cct, 10) << "mark_caps_flushing " << ccap_string(flushing) << " " << *in << dendl; - num_flushing_caps++; - } else { - ldout(cct, 10) << "mark_caps_flushing (more) " << ccap_string(flushing) << " " << *in << dendl; - } - - in->flushing_caps |= flushing; - in->dirty_caps = 0; - - if (!in->flushing_cap_item.is_on_list()) - session->flushing_caps.push_back(&in->flushing_cap_item); - session->flushing_caps_tids.insert(flush_tid); - - *ptid = flush_tid; - return flushing; -} - -void Client::adjust_session_flushing_caps(Inode *in, MetaSession *old_s, MetaSession *new_s) -{ - for (auto &p : in->cap_snaps) { - CapSnap &capsnap = p.second; - if (capsnap.flush_tid > 0) { - old_s->flushing_caps_tids.erase(capsnap.flush_tid); - new_s->flushing_caps_tids.insert(capsnap.flush_tid); - } - } - for (map::iterator it = in->flushing_cap_tids.begin(); - it != in->flushing_cap_tids.end(); - ++it) { - old_s->flushing_caps_tids.erase(it->first); - new_s->flushing_caps_tids.insert(it->first); - } - new_s->flushing_caps.push_back(&in->flushing_cap_item); -} - -/* - * Flush all caps back to the MDS. Because the callers generally wait on the - * result of this function (syncfs and umount cases), we set - * CHECK_CAPS_SYNCHRONOUS on the last check_caps call. - */ -void Client::flush_caps_sync() -{ - ldout(cct, 10) << __func__ << dendl; - xlist::iterator p = delayed_caps.begin(); - while (!p.end()) { - unsigned flags = CHECK_CAPS_NODELAY; - Inode *in = *p; - - ++p; - delayed_caps.pop_front(); - if (p.end() && cap_list.empty()) - flags |= CHECK_CAPS_SYNCHRONOUS; - check_caps(in, flags); - } - - // other caps, too - p = cap_list.begin(); - while (!p.end()) { - unsigned flags = CHECK_CAPS_NODELAY; - Inode *in = *p; - - ++p; - if (p.end()) - flags |= CHECK_CAPS_SYNCHRONOUS; - check_caps(in, flags); - } -} - -void Client::flush_caps(Inode *in, MetaSession *session, bool sync) -{ - ldout(cct, 10) << "flush_caps " << in << " mds." << session->mds_num << dendl; - Cap *cap = in->auth_cap; - assert(cap->session == session); - - for (map::iterator p = in->flushing_cap_tids.begin(); - p != in->flushing_cap_tids.end(); - ++p) { - bool req_sync = false; - - /* If this is a synchronous request, then flush the journal on last one */ - if (sync && (p->first == in->flushing_cap_tids.rbegin()->first)) - req_sync = true; - - send_cap(in, session, cap, req_sync, - (get_caps_used(in) | in->caps_dirty()), - in->caps_wanted(), (cap->issued | cap->implemented), - p->second, p->first); - } -} - -void Client::wait_sync_caps(Inode *in, ceph_tid_t want) -{ - while (in->flushing_caps) { - map::iterator it = in->flushing_cap_tids.begin(); - assert(it != in->flushing_cap_tids.end()); - if (it->first > want) - break; - ldout(cct, 10) << "wait_sync_caps on " << *in << " flushing " - << ccap_string(it->second) << " want " << want - << " last " << it->first << dendl; - wait_on_list(in->waitfor_caps); - } -} - -void Client::wait_sync_caps(ceph_tid_t want) -{ - retry: - ldout(cct, 10) << "wait_sync_caps want " << want << " (last is " << last_flush_tid << ", " - << num_flushing_caps << " total flushing)" << dendl; - for (map::iterator p = mds_sessions.begin(); - p != mds_sessions.end(); - ++p) { - MetaSession *s = p->second; - if (s->flushing_caps_tids.empty()) - continue; - ceph_tid_t oldest_tid = *s->flushing_caps_tids.begin(); - if (oldest_tid <= want) { - ldout(cct, 10) << " waiting on mds." << p->first << " tid " << oldest_tid - << " (want " << want << ")" << dendl; - sync_cond.Wait(client_lock); - goto retry; - } - } -} - -void Client::kick_flushing_caps(MetaSession *session) -{ - mds_rank_t mds = session->mds_num; - ldout(cct, 10) << "kick_flushing_caps mds." << mds << dendl; - - for (xlist::iterator p = session->flushing_caps.begin(); !p.end(); ++p) { - Inode *in = *p; - if (session->early_flushing_caps.count(in)) - continue; - ldout(cct, 20) << " reflushing caps on " << *in << " to mds." << mds << dendl; - if (in->cap_snaps.size()) - flush_snaps(in, true); - if (in->flushing_caps) - flush_caps(in, session); - } - - session->early_flushing_caps.clear(); -} - -void Client::early_kick_flushing_caps(MetaSession *session) -{ - session->early_flushing_caps.clear(); - - for (xlist::iterator p = session->flushing_caps.begin(); !p.end(); ++p) { - Inode *in = *p; - assert(in->auth_cap); - - // if flushing caps were revoked, we re-send the cap flush in client reconnect - // stage. This guarantees that MDS processes the cap flush message before issuing - // the flushing caps to other client. - if ((in->flushing_caps & in->auth_cap->issued) == in->flushing_caps) - continue; - - ldout(cct, 20) << " reflushing caps (early_kick) on " << *in - << " to mds." << session->mds_num << dendl; - - session->early_flushing_caps.insert(in); - - if (in->cap_snaps.size()) - flush_snaps(in, true); - if (in->flushing_caps) - flush_caps(in, session); - - } -} - -void Client::kick_maxsize_requests(MetaSession *session) -{ - xlist::iterator iter = session->caps.begin(); - while (!iter.end()){ - (*iter)->inode->requested_max_size = 0; - (*iter)->inode->wanted_max_size = 0; - signal_cond_list((*iter)->inode->waitfor_caps); - ++iter; - } -} - -void SnapRealm::build_snap_context() -{ - set snaps; - snapid_t max_seq = seq; - - // start with prior_parents? - for (unsigned i=0; iget_snap_context(); - for (unsigned i=0; i= parent_since) - snaps.insert(psnapc.snaps[i]); - if (psnapc.seq > max_seq) - max_seq = psnapc.seq; - } - - // my snaps - for (unsigned i=0; i::reverse_iterator p = snaps.rbegin(); p != snaps.rend(); ++p) - cached_snap_context.snaps.push_back(*p); -} - -void Client::invalidate_snaprealm_and_children(SnapRealm *realm) -{ - list q; - q.push_back(realm); - - while (!q.empty()) { - realm = q.front(); - q.pop_front(); - - ldout(cct, 10) << "invalidate_snaprealm_and_children " << *realm << dendl; - realm->invalidate_cache(); - - for (set::iterator p = realm->pchildren.begin(); - p != realm->pchildren.end(); - ++p) - q.push_back(*p); - } -} - -SnapRealm *Client::get_snap_realm(inodeno_t r) -{ - SnapRealm *realm = snap_realms[r]; - if (!realm) - snap_realms[r] = realm = new SnapRealm(r); - ldout(cct, 20) << "get_snap_realm " << r << " " << realm << " " << realm->nref << " -> " << (realm->nref + 1) << dendl; - realm->nref++; - return realm; -} - -SnapRealm *Client::get_snap_realm_maybe(inodeno_t r) -{ - if (snap_realms.count(r) == 0) { - ldout(cct, 20) << "get_snap_realm_maybe " << r << " fail" << dendl; - return NULL; - } - SnapRealm *realm = snap_realms[r]; - ldout(cct, 20) << "get_snap_realm_maybe " << r << " " << realm << " " << realm->nref << " -> " << (realm->nref + 1) << dendl; - realm->nref++; - return realm; -} - -void Client::put_snap_realm(SnapRealm *realm) -{ - ldout(cct, 20) << "put_snap_realm " << realm->ino << " " << realm - << " " << realm->nref << " -> " << (realm->nref - 1) << dendl; - if (--realm->nref == 0) { - snap_realms.erase(realm->ino); - if (realm->pparent) { - realm->pparent->pchildren.erase(realm); - put_snap_realm(realm->pparent); - } - delete realm; - } -} - -bool Client::adjust_realm_parent(SnapRealm *realm, inodeno_t parent) -{ - if (realm->parent != parent) { - ldout(cct, 10) << "adjust_realm_parent " << *realm - << " " << realm->parent << " -> " << parent << dendl; - realm->parent = parent; - if (realm->pparent) { - realm->pparent->pchildren.erase(realm); - put_snap_realm(realm->pparent); - } - realm->pparent = get_snap_realm(parent); - realm->pparent->pchildren.insert(realm); - return true; - } - return false; -} - -static bool has_new_snaps(const SnapContext& old_snapc, - const SnapContext& new_snapc) -{ - return !new_snapc.snaps.empty() && new_snapc.snaps[0] > old_snapc.seq; -} - - -void Client::update_snap_trace(bufferlist& bl, SnapRealm **realm_ret, bool flush) -{ - SnapRealm *first_realm = NULL; - ldout(cct, 10) << "update_snap_trace len " << bl.length() << dendl; - - map dirty_realms; - - bufferlist::iterator p = bl.begin(); - while (!p.end()) { - SnapRealmInfo info; - ::decode(info, p); - SnapRealm *realm = get_snap_realm(info.ino()); - - bool invalidate = false; - - if (info.seq() > realm->seq) { - ldout(cct, 10) << "update_snap_trace " << *realm << " seq " << info.seq() << " > " << realm->seq - << dendl; - - if (flush) { - // writeback any dirty caps _before_ updating snap list (i.e. with old snap info) - // flush me + children - list q; - q.push_back(realm); - while (!q.empty()) { - SnapRealm *realm = q.front(); - q.pop_front(); - - for (set::iterator p = realm->pchildren.begin(); - p != realm->pchildren.end(); - ++p) - q.push_back(*p); - - if (dirty_realms.count(realm) == 0) { - realm->nref++; - dirty_realms[realm] = realm->get_snap_context(); - } - } - } - - // update - realm->seq = info.seq(); - realm->created = info.created(); - realm->parent_since = info.parent_since(); - realm->prior_parent_snaps = info.prior_parent_snaps; - realm->my_snaps = info.my_snaps; - invalidate = true; - } - - // _always_ verify parent - if (adjust_realm_parent(realm, info.parent())) - invalidate = true; - - if (invalidate) { - invalidate_snaprealm_and_children(realm); - ldout(cct, 15) << "update_snap_trace " << *realm << " self|parent updated" << dendl; - ldout(cct, 15) << " snapc " << realm->get_snap_context() << dendl; - } else { - ldout(cct, 10) << "update_snap_trace " << *realm << " seq " << info.seq() - << " <= " << realm->seq << " and same parent, SKIPPING" << dendl; - } - - if (!first_realm) - first_realm = realm; - else - put_snap_realm(realm); - } - - for (map::iterator q = dirty_realms.begin(); - q != dirty_realms.end(); - ++q) { - SnapRealm *realm = q->first; - // if there are new snaps ? - if (has_new_snaps(q->second, realm->get_snap_context())) { - ldout(cct, 10) << " flushing caps on " << *realm << dendl; - xlist::iterator r = realm->inodes_with_caps.begin(); - while (!r.end()) { - Inode *in = *r; - ++r; - queue_cap_snap(in, q->second); - } - } else { - ldout(cct, 10) << " no new snap on " << *realm << dendl; - } - put_snap_realm(realm); - } - - if (realm_ret) - *realm_ret = first_realm; - else - put_snap_realm(first_realm); -} - -void Client::handle_snap(MClientSnap *m) -{ - ldout(cct, 10) << "handle_snap " << *m << dendl; - mds_rank_t mds = mds_rank_t(m->get_source().num()); - MetaSession *session = _get_mds_session(mds, m->get_connection().get()); - if (!session) { - m->put(); - return; - } - - got_mds_push(session); - - map to_move; - SnapRealm *realm = 0; - - if (m->head.op == CEPH_SNAP_OP_SPLIT) { - assert(m->head.split); - SnapRealmInfo info; - bufferlist::iterator p = m->bl.begin(); - ::decode(info, p); - assert(info.ino() == m->head.split); - - // flush, then move, ino's. - realm = get_snap_realm(info.ino()); - ldout(cct, 10) << " splitting off " << *realm << dendl; - for (vector::iterator p = m->split_inos.begin(); - p != m->split_inos.end(); - ++p) { - vinodeno_t vino(*p, CEPH_NOSNAP); - if (inode_map.count(vino)) { - Inode *in = inode_map[vino]; - if (!in->snaprealm || in->snaprealm == realm) - continue; - if (in->snaprealm->created > info.created()) { - ldout(cct, 10) << " NOT moving " << *in << " from _newer_ realm " - << *in->snaprealm << dendl; - continue; - } - ldout(cct, 10) << " moving " << *in << " from " << *in->snaprealm << dendl; - - - in->snaprealm_item.remove_myself(); - to_move[in] = in->snaprealm->get_snap_context(); - put_snap_realm(in->snaprealm); - } - } - - // move child snaprealms, too - for (vector::iterator p = m->split_realms.begin(); - p != m->split_realms.end(); - ++p) { - ldout(cct, 10) << "adjusting snaprealm " << *p << " parent" << dendl; - SnapRealm *child = get_snap_realm_maybe(*p); - if (!child) - continue; - adjust_realm_parent(child, realm->ino); - put_snap_realm(child); - } - } - - update_snap_trace(m->bl, NULL, m->head.op != CEPH_SNAP_OP_DESTROY); - - if (realm) { - for (auto p = to_move.begin(); p != to_move.end(); ++p) { - Inode *in = p->first; - in->snaprealm = realm; - realm->inodes_with_caps.push_back(&in->snaprealm_item); - realm->nref++; - // queue for snap writeback - if (has_new_snaps(p->second, realm->get_snap_context())) - queue_cap_snap(in, p->second); - } - put_snap_realm(realm); - } - - m->put(); -} - -void Client::handle_quota(MClientQuota *m) -{ - mds_rank_t mds = mds_rank_t(m->get_source().num()); - MetaSession *session = _get_mds_session(mds, m->get_connection().get()); - if (!session) { - m->put(); - return; - } - - got_mds_push(session); - - ldout(cct, 10) << "handle_quota " << *m << " from mds." << mds << dendl; - - vinodeno_t vino(m->ino, CEPH_NOSNAP); - if (inode_map.count(vino)) { - Inode *in = NULL; - in = inode_map[vino]; - - if (in) { - in->quota = m->quota; - in->rstat = m->rstat; - } - } - - m->put(); -} - -void Client::handle_caps(MClientCaps *m) -{ - mds_rank_t mds = mds_rank_t(m->get_source().num()); - MetaSession *session = _get_mds_session(mds, m->get_connection().get()); - if (!session) { - m->put(); - return; - } - - if (m->osd_epoch_barrier && !objecter->have_map(m->osd_epoch_barrier)) { - // Pause RADOS operations until we see the required epoch - objecter->set_epoch_barrier(m->osd_epoch_barrier); - } - - if (m->osd_epoch_barrier > cap_epoch_barrier) { - // Record the barrier so that we will transmit it to MDS when releasing - set_cap_epoch_barrier(m->osd_epoch_barrier); - } - - got_mds_push(session); - - m->clear_payload(); // for if/when we send back to MDS - - Inode *in = 0; - vinodeno_t vino(m->get_ino(), CEPH_NOSNAP); - if (inode_map.count(vino)) - in = inode_map[vino]; - if (!in) { - if (m->get_op() == CEPH_CAP_OP_IMPORT) { - ldout(cct, 5) << "handle_caps don't have vino " << vino << " on IMPORT, immediately releasing" << dendl; - session->enqueue_cap_release( - m->get_ino(), - m->get_cap_id(), - m->get_seq(), - m->get_mseq(), - cap_epoch_barrier); - } else { - ldout(cct, 5) << "handle_caps don't have vino " << vino << ", dropping" << dendl; - } - m->put(); - - // in case the mds is waiting on e.g. a revocation - flush_cap_releases(); - return; - } - - switch (m->get_op()) { - case CEPH_CAP_OP_EXPORT: - return handle_cap_export(session, in, m); - case CEPH_CAP_OP_FLUSHSNAP_ACK: - return handle_cap_flushsnap_ack(session, in, m); - case CEPH_CAP_OP_IMPORT: - handle_cap_import(session, in, m); - } - - if (in->caps.count(mds) == 0) { - ldout(cct, 5) << "handle_caps don't have " << *in << " cap on mds." << mds << dendl; - m->put(); - return; - } - - Cap *cap = in->caps[mds]; - - switch (m->get_op()) { - case CEPH_CAP_OP_TRUNC: return handle_cap_trunc(session, in, m); - case CEPH_CAP_OP_IMPORT: - case CEPH_CAP_OP_REVOKE: - case CEPH_CAP_OP_GRANT: return handle_cap_grant(session, in, cap, m); - case CEPH_CAP_OP_FLUSH_ACK: return handle_cap_flush_ack(session, in, cap, m); - default: - m->put(); - } -} - -void Client::handle_cap_import(MetaSession *session, Inode *in, MClientCaps *m) -{ - mds_rank_t mds = session->mds_num; - - ldout(cct, 5) << "handle_cap_import ino " << m->get_ino() << " mseq " << m->get_mseq() - << " IMPORT from mds." << mds << dendl; - - const mds_rank_t peer_mds = mds_rank_t(m->peer.mds); - Cap *cap = NULL; - UserPerm cap_perms; - if (m->peer.cap_id && in->caps.count(peer_mds)) { - cap = in->caps[peer_mds]; - if (cap) { - cap_perms = cap->latest_perms; - } - } - - // add/update it - SnapRealm *realm = NULL; - update_snap_trace(m->snapbl, &realm); - - add_update_cap(in, session, m->get_cap_id(), - m->get_caps(), m->get_seq(), m->get_mseq(), m->get_realm(), - CEPH_CAP_FLAG_AUTH, cap_perms); - - if (cap && cap->cap_id == m->peer.cap_id) { - remove_cap(cap, (m->peer.flags & CEPH_CAP_FLAG_RELEASE)); - } - - if (realm) - put_snap_realm(realm); - - if (in->auth_cap && in->auth_cap->session->mds_num == mds) { - // reflush any/all caps (if we are now the auth_cap) - if (in->cap_snaps.size()) - flush_snaps(in, true); - if (in->flushing_caps) - flush_caps(in, session); - } -} - -void Client::handle_cap_export(MetaSession *session, Inode *in, MClientCaps *m) -{ - mds_rank_t mds = session->mds_num; - - ldout(cct, 5) << "handle_cap_export ino " << m->get_ino() << " mseq " << m->get_mseq() - << " EXPORT from mds." << mds << dendl; - - Cap *cap = NULL; - if (in->caps.count(mds)) - cap = in->caps[mds]; - - const mds_rank_t peer_mds = mds_rank_t(m->peer.mds); - - if (cap && cap->cap_id == m->get_cap_id()) { - if (m->peer.cap_id) { - MetaSession *tsession = _get_or_open_mds_session(peer_mds); - if (in->caps.count(peer_mds)) { - Cap *tcap = in->caps[peer_mds]; - if (tcap->cap_id == m->peer.cap_id && - ceph_seq_cmp(tcap->seq, m->peer.seq) < 0) { - tcap->cap_id = m->peer.cap_id; - tcap->seq = m->peer.seq - 1; - tcap->issue_seq = tcap->seq; - tcap->mseq = m->peer.mseq; - tcap->issued |= cap->issued; - tcap->implemented |= cap->issued; - if (cap == in->auth_cap) - in->auth_cap = tcap; - if (in->auth_cap == tcap && in->flushing_cap_item.is_on_list()) - adjust_session_flushing_caps(in, session, tsession); - } - } else { - add_update_cap(in, tsession, m->peer.cap_id, cap->issued, - m->peer.seq - 1, m->peer.mseq, (uint64_t)-1, - cap == in->auth_cap ? CEPH_CAP_FLAG_AUTH : 0, - cap->latest_perms); - } - } else { - if (cap == in->auth_cap) - in->flags |= I_CAP_DROPPED; - } - - remove_cap(cap, false); - } - - m->put(); -} - -void Client::handle_cap_trunc(MetaSession *session, Inode *in, MClientCaps *m) -{ - mds_rank_t mds = session->mds_num; - assert(in->caps[mds]); - - ldout(cct, 10) << "handle_cap_trunc on ino " << *in - << " size " << in->size << " -> " << m->get_size() - << dendl; - - int implemented = 0; - int issued = in->caps_issued(&implemented) | in->caps_dirty(); - issued |= implemented; - update_inode_file_bits(in, m->get_truncate_seq(), m->get_truncate_size(), - m->get_size(), m->get_change_attr(), m->get_time_warp_seq(), - m->get_ctime(), m->get_mtime(), m->get_atime(), - m->inline_version, m->inline_data, issued); - m->put(); -} - -void Client::handle_cap_flush_ack(MetaSession *session, Inode *in, Cap *cap, MClientCaps *m) -{ - ceph_tid_t flush_ack_tid = m->get_client_tid(); - int dirty = m->get_dirty(); - int cleaned = 0; - int flushed = 0; - - for (map::iterator it = in->flushing_cap_tids.begin(); - it != in->flushing_cap_tids.end(); ) { - if (it->first == flush_ack_tid) - cleaned = it->second; - if (it->first <= flush_ack_tid) { - session->flushing_caps_tids.erase(it->first); - in->flushing_cap_tids.erase(it++); - ++flushed; - continue; - } - cleaned &= ~it->second; - if (!cleaned) - break; - ++it; - } - - ldout(cct, 5) << "handle_cap_flush_ack mds." << session->mds_num - << " cleaned " << ccap_string(cleaned) << " on " << *in - << " with " << ccap_string(dirty) << dendl; - - if (flushed) { - signal_cond_list(in->waitfor_caps); - if (session->flushing_caps_tids.empty() || - *session->flushing_caps_tids.begin() > flush_ack_tid) - sync_cond.Signal(); - } - - if (!dirty) { - in->cap_dirtier_uid = -1; - in->cap_dirtier_gid = -1; - } - - if (!cleaned) { - ldout(cct, 10) << " tid " << m->get_client_tid() << " != any cap bit tids" << dendl; - } else { - if (in->flushing_caps) { - ldout(cct, 5) << " flushing_caps " << ccap_string(in->flushing_caps) - << " -> " << ccap_string(in->flushing_caps & ~cleaned) << dendl; - in->flushing_caps &= ~cleaned; - if (in->flushing_caps == 0) { - ldout(cct, 10) << " " << *in << " !flushing" << dendl; - num_flushing_caps--; - if (in->cap_snaps.empty()) - in->flushing_cap_item.remove_myself(); - } - if (!in->caps_dirty()) - put_inode(in); - } - } - - m->put(); -} - - -void Client::handle_cap_flushsnap_ack(MetaSession *session, Inode *in, MClientCaps *m) -{ - mds_rank_t mds = session->mds_num; - assert(in->caps[mds]); - snapid_t follows = m->get_snap_follows(); - - if (in->cap_snaps.count(follows)) { - CapSnap &capsnap = in->cap_snaps.at(follows); - if (m->get_client_tid() != capsnap.flush_tid) { - ldout(cct, 10) << " tid " << m->get_client_tid() << " != " << capsnap.flush_tid << dendl; - } else { - ldout(cct, 5) << "handle_cap_flushedsnap mds." << mds << " flushed snap follows " << follows - << " on " << *in << dendl; - InodeRef tmp_ref; - if (in->get_num_ref() == 1) - tmp_ref = in; // make sure inode not get freed while erasing item from in->cap_snaps - if (in->flushing_caps == 0 && in->cap_snaps.empty()) - in->flushing_cap_item.remove_myself(); - session->flushing_caps_tids.erase(capsnap.flush_tid); - in->cap_snaps.erase(follows); - } - } else { - ldout(cct, 5) << "handle_cap_flushedsnap DUP(?) mds." << mds << " flushed snap follows " << follows - << " on " << *in << dendl; - // we may not have it if we send multiple FLUSHSNAP requests and (got multiple FLUSHEDSNAPs back) - } - - m->put(); -} - -class C_Client_DentryInvalidate : public Context { -private: - Client *client; - vinodeno_t dirino; - vinodeno_t ino; - string name; -public: - C_Client_DentryInvalidate(Client *c, Dentry *dn, bool del) : - client(c), name(dn->name) { - if (client->use_faked_inos()) { - dirino.ino = dn->dir->parent_inode->faked_ino; - if (del) - ino.ino = dn->inode->faked_ino; - } else { - dirino = dn->dir->parent_inode->vino(); - if (del) - ino = dn->inode->vino(); - } - if (!del) - ino.ino = inodeno_t(); - } - void finish(int r) override { - // _async_dentry_invalidate is responsible for its own locking - assert(!client->client_lock.is_locked_by_me()); - client->_async_dentry_invalidate(dirino, ino, name); - } -}; - -void Client::_async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, string& name) -{ - if (unmounting) - return; - ldout(cct, 10) << "_async_dentry_invalidate '" << name << "' ino " << ino - << " in dir " << dirino << dendl; - dentry_invalidate_cb(callback_handle, dirino, ino, name); -} - -void Client::_schedule_invalidate_dentry_callback(Dentry *dn, bool del) -{ - if (dentry_invalidate_cb && dn->inode->ll_ref > 0) - async_dentry_invalidator.queue(new C_Client_DentryInvalidate(this, dn, del)); -} - -void Client::_try_to_trim_inode(Inode *in, bool sched_inval) -{ - int ref = in->get_num_ref(); - - if (in->dir && !in->dir->dentries.empty()) { - for (auto p = in->dir->dentries.begin(); - p != in->dir->dentries.end(); ) { - Dentry *dn = p->second; - ++p; - /* rmsnap removes whole subtree, need trim inodes recursively. - * we don't need to invalidate dentries recursively. because - * invalidating a directory dentry effectively invalidate - * whole subtree */ - if (in->snapid != CEPH_NOSNAP && dn->inode && dn->inode->is_dir()) - _try_to_trim_inode(dn->inode.get(), false); - - if (dn->lru_is_expireable()) - unlink(dn, true, false); // keep dir, drop dentry - } - if (in->dir->dentries.empty()) { - close_dir(in->dir); - --ref; - } - } - - if (ref > 0 && (in->flags & I_SNAPDIR_OPEN)) { - InodeRef snapdir = open_snapdir(in); - _try_to_trim_inode(snapdir.get(), false); - --ref; - } - - if (ref > 0 && in->ll_ref > 0 && sched_inval) { - set::iterator q = in->dn_set.begin(); - while (q != in->dn_set.end()) { - Dentry *dn = *q++; - // FIXME: we play lots of unlink/link tricks when handling MDS replies, - // so in->dn_set doesn't always reflect the state of kernel's dcache. - _schedule_invalidate_dentry_callback(dn, true); - unlink(dn, true, true); - } - } -} - -void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClientCaps *m) -{ - mds_rank_t mds = session->mds_num; - int used = get_caps_used(in); - int wanted = in->caps_wanted(); - - const int old_caps = cap->issued; - const int new_caps = m->get_caps(); - ldout(cct, 5) << "handle_cap_grant on in " << m->get_ino() - << " mds." << mds << " seq " << m->get_seq() - << " caps now " << ccap_string(new_caps) - << " was " << ccap_string(old_caps) << dendl; - cap->seq = m->get_seq(); - - in->layout = m->get_layout(); - - // update inode - int implemented = 0; - int issued = in->caps_issued(&implemented) | in->caps_dirty(); - issued |= implemented; - - if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { - in->mode = m->head.mode; - in->uid = m->head.uid; - in->gid = m->head.gid; - in->btime = m->btime; - } - bool deleted_inode = false; - if ((issued & CEPH_CAP_LINK_EXCL) == 0) { - in->nlink = m->head.nlink; - if (in->nlink == 0 && - (new_caps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) - deleted_inode = true; - } - if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && - m->xattrbl.length() && - m->head.xattr_version > in->xattr_version) { - bufferlist::iterator p = m->xattrbl.begin(); - ::decode(in->xattrs, p); - in->xattr_version = m->head.xattr_version; - } - update_inode_file_bits(in, m->get_truncate_seq(), m->get_truncate_size(), m->get_size(), - m->get_change_attr(), m->get_time_warp_seq(), m->get_ctime(), - m->get_mtime(), m->get_atime(), - m->inline_version, m->inline_data, issued); - - // max_size - if (cap == in->auth_cap && - m->get_max_size() != in->max_size) { - ldout(cct, 10) << "max_size " << in->max_size << " -> " << m->get_max_size() << dendl; - in->max_size = m->get_max_size(); - if (in->max_size > in->wanted_max_size) { - in->wanted_max_size = 0; - in->requested_max_size = 0; - } - } - - bool check = false; - if (m->get_op() == CEPH_CAP_OP_IMPORT && m->get_wanted() != wanted) - check = true; - - check_cap_issue(in, cap, new_caps); - - // update caps - if (old_caps & ~new_caps) { - ldout(cct, 10) << " revocation of " << ccap_string(~new_caps & old_caps) << dendl; - cap->issued = new_caps; - cap->implemented |= new_caps; - - if (((used & ~new_caps) & CEPH_CAP_FILE_BUFFER) - && !_flush(in, new C_Client_FlushComplete(this, in))) { - // waitin' for flush - } else if ((old_caps & ~new_caps) & CEPH_CAP_FILE_CACHE) { - if (_release(in)) - check = true; - } else { - cap->wanted = 0; // don't let check_caps skip sending a response to MDS - check = true; - } - - } else if (old_caps == new_caps) { - ldout(cct, 10) << " caps unchanged at " << ccap_string(old_caps) << dendl; - } else { - ldout(cct, 10) << " grant, new caps are " << ccap_string(new_caps & ~old_caps) << dendl; - cap->issued = new_caps; - cap->implemented |= new_caps; - - if (cap == in->auth_cap) { - // non-auth MDS is revoking the newly grant caps ? - for (map::iterator it = in->caps.begin(); it != in->caps.end(); ++it) { - if (it->second == cap) - continue; - if (it->second->implemented & ~it->second->issued & new_caps) { - check = true; - break; - } - } - } - } - - if (check) - check_caps(in, 0); - - // wake up waiters - if (new_caps) - signal_cond_list(in->waitfor_caps); - - // may drop inode's last ref - if (deleted_inode) - _try_to_trim_inode(in, true); - - m->put(); -} - -int Client::_getgrouplist(gid_t** sgids, uid_t uid, gid_t gid) -{ - // cppcheck-suppress variableScope - int sgid_count; - gid_t *sgid_buf; - - if (getgroups_cb) { - sgid_count = getgroups_cb(callback_handle, &sgid_buf); - if (sgid_count > 0) { - *sgids = sgid_buf; - return sgid_count; - } - } - -#if HAVE_GETGROUPLIST - struct passwd *pw; - pw = getpwuid(uid); - if (pw == NULL) { - ldout(cct, 3) << "getting user entry failed" << dendl; - return -errno; - } - //use PAM to get the group list - // initial number of group entries, defaults to posix standard of 16 - // PAM implementations may provide more than 16 groups.... - sgid_count = 16; - sgid_buf = (gid_t*)malloc(sgid_count * sizeof(gid_t)); - if (sgid_buf == NULL) { - ldout(cct, 3) << "allocating group memory failed" << dendl; - return -ENOMEM; - } - - while (1) { -#if defined(__APPLE__) - if (getgrouplist(pw->pw_name, gid, (int*)sgid_buf, &sgid_count) == -1) { -#else - if (getgrouplist(pw->pw_name, gid, sgid_buf, &sgid_count) == -1) { -#endif - // we need to resize the group list and try again - void *_realloc = NULL; - if ((_realloc = realloc(sgid_buf, sgid_count * sizeof(gid_t))) == NULL) { - ldout(cct, 3) << "allocating group memory failed" << dendl; - free(sgid_buf); - return -ENOMEM; - } - sgid_buf = (gid_t*)_realloc; - continue; - } - // list was successfully retrieved - break; - } - *sgids = sgid_buf; - return sgid_count; -#else - return 0; -#endif -} - -int Client::inode_permission(Inode *in, const UserPerm& perms, unsigned want) -{ - if (perms.uid() == 0) - return 0; - - if (perms.uid() != in->uid && (in->mode & S_IRWXG)) { - int ret = _posix_acl_permission(in, perms, want); - if (ret != -EAGAIN) - return ret; - } - - // check permissions before doing anything else - if (!in->check_mode(perms, want)) - return -EACCES; - return 0; -} - -int Client::xattr_permission(Inode *in, const char *name, unsigned want, - const UserPerm& perms) -{ - int r = _getattr_for_perm(in, perms); - if (r < 0) - goto out; - - r = 0; - if (strncmp(name, "system.", 7) == 0) { - if ((want & MAY_WRITE) && (perms.uid() != 0 && perms.uid() != in->uid)) - r = -EPERM; - } else { - r = inode_permission(in, perms, want); - } -out: - ldout(cct, 3) << __func__ << " " << in << " = " << r << dendl; - return r; -} - -ostream& operator<<(ostream &out, const UserPerm& perm) { - out << "UserPerm(uid: " << perm.uid() << ", gid: " << perm.gid() << ")"; - return out; -} - -int Client::may_setattr(Inode *in, struct ceph_statx *stx, int mask, - const UserPerm& perms) -{ - ldout(cct, 20) << __func__ << " " << *in << "; " << perms << dendl; - int r = _getattr_for_perm(in, perms); - if (r < 0) - goto out; - - if (mask & CEPH_SETATTR_SIZE) { - r = inode_permission(in, perms, MAY_WRITE); - if (r < 0) - goto out; - } - - r = -EPERM; - if (mask & CEPH_SETATTR_UID) { - if (perms.uid() != 0 && (perms.uid() != in->uid || stx->stx_uid != in->uid)) - goto out; - } - if (mask & CEPH_SETATTR_GID) { - if (perms.uid() != 0 && (perms.uid() != in->uid || - (!perms.gid_in_groups(stx->stx_gid) && stx->stx_gid != in->gid))) - goto out; - } - - if (mask & CEPH_SETATTR_MODE) { - if (perms.uid() != 0 && perms.uid() != in->uid) - goto out; - - gid_t i_gid = (mask & CEPH_SETATTR_GID) ? stx->stx_gid : in->gid; - if (perms.uid() != 0 && !perms.gid_in_groups(i_gid)) - stx->stx_mode &= ~S_ISGID; - } - - if (mask & (CEPH_SETATTR_CTIME | CEPH_SETATTR_BTIME | - CEPH_SETATTR_MTIME | CEPH_SETATTR_ATIME)) { - if (perms.uid() != 0 && perms.uid() != in->uid) { - int check_mask = CEPH_SETATTR_CTIME | CEPH_SETATTR_BTIME; - if (!(mask & CEPH_SETATTR_MTIME_NOW)) - check_mask |= CEPH_SETATTR_MTIME; - if (!(mask & CEPH_SETATTR_ATIME_NOW)) - check_mask |= CEPH_SETATTR_ATIME; - if (check_mask & mask) { - goto out; - } else { - r = inode_permission(in, perms, MAY_WRITE); - if (r < 0) - goto out; - } - } - } - r = 0; -out: - ldout(cct, 3) << __func__ << " " << in << " = " << r << dendl; - return r; -} - -int Client::may_open(Inode *in, int flags, const UserPerm& perms) -{ - ldout(cct, 20) << __func__ << " " << *in << "; " << perms << dendl; - unsigned want = 0; - - if ((flags & O_ACCMODE) == O_WRONLY) - want = MAY_WRITE; - else if ((flags & O_ACCMODE) == O_RDWR) - want = MAY_READ | MAY_WRITE; - else if ((flags & O_ACCMODE) == O_RDONLY) - want = MAY_READ; - if (flags & O_TRUNC) - want |= MAY_WRITE; - - int r = 0; - switch (in->mode & S_IFMT) { - case S_IFLNK: - r = -ELOOP; - goto out; - case S_IFDIR: - if (want & MAY_WRITE) { - r = -EISDIR; - goto out; - } - break; - } - - r = _getattr_for_perm(in, perms); - if (r < 0) - goto out; - - r = inode_permission(in, perms, want); -out: - ldout(cct, 3) << __func__ << " " << in << " = " << r << dendl; - return r; -} - -int Client::may_lookup(Inode *dir, const UserPerm& perms) -{ - ldout(cct, 20) << __func__ << " " << *dir << "; " << perms << dendl; - int r = _getattr_for_perm(dir, perms); - if (r < 0) - goto out; - - r = inode_permission(dir, perms, MAY_EXEC); -out: - ldout(cct, 3) << __func__ << " " << dir << " = " << r << dendl; - return r; -} - -int Client::may_create(Inode *dir, const UserPerm& perms) -{ - ldout(cct, 20) << __func__ << " " << *dir << "; " << perms << dendl; - int r = _getattr_for_perm(dir, perms); - if (r < 0) - goto out; - - r = inode_permission(dir, perms, MAY_EXEC | MAY_WRITE); -out: - ldout(cct, 3) << __func__ << " " << dir << " = " << r << dendl; - return r; -} - -int Client::may_delete(Inode *dir, const char *name, const UserPerm& perms) -{ - ldout(cct, 20) << __func__ << " " << *dir << "; " << "; name " << name << "; " << perms << dendl; - int r = _getattr_for_perm(dir, perms); - if (r < 0) - goto out; - - r = inode_permission(dir, perms, MAY_EXEC | MAY_WRITE); - if (r < 0) - goto out; - - /* 'name == NULL' means rmsnap */ - if (perms.uid() != 0 && name && (dir->mode & S_ISVTX)) { - InodeRef otherin; - r = _lookup(dir, name, CEPH_CAP_AUTH_SHARED, &otherin, perms); - if (r < 0) - goto out; - if (dir->uid != perms.uid() && otherin->uid != perms.uid()) - r = -EPERM; - } -out: - ldout(cct, 3) << __func__ << " " << dir << " = " << r << dendl; - return r; -} - -int Client::may_hardlink(Inode *in, const UserPerm& perms) -{ - ldout(cct, 20) << __func__ << " " << *in << "; " << perms << dendl; - int r = _getattr_for_perm(in, perms); - if (r < 0) - goto out; - - if (perms.uid() == 0 || perms.uid() == in->uid) { - r = 0; - goto out; - } - - r = -EPERM; - if (!S_ISREG(in->mode)) - goto out; - - if (in->mode & S_ISUID) - goto out; - - if ((in->mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) - goto out; - - r = inode_permission(in, perms, MAY_READ | MAY_WRITE); -out: - ldout(cct, 3) << __func__ << " " << in << " = " << r << dendl; - return r; -} - -int Client::_getattr_for_perm(Inode *in, const UserPerm& perms) -{ - int mask = CEPH_STAT_CAP_MODE; - bool force = false; - if (acl_type != NO_ACL) { - mask |= CEPH_STAT_CAP_XATTR; - force = in->xattr_version == 0; - } - return _getattr(in, mask, perms, force); -} - -vinodeno_t Client::_get_vino(Inode *in) -{ - /* The caller must hold the client lock */ - return vinodeno_t(in->ino, in->snapid); -} - -inodeno_t Client::_get_inodeno(Inode *in) -{ - /* The caller must hold the client lock */ - return in->ino; -} - - -/** - * Resolve an MDS spec to a list of MDS daemon GIDs. - * - * The spec is a string representing a GID, rank, filesystem:rank, or name/id. - * It may be '*' in which case it matches all GIDs. - * - * If no error is returned, the `targets` vector will be populated with at least - * one MDS. - */ -int Client::resolve_mds( - const std::string &mds_spec, - std::vector *targets) -{ - assert(fsmap); - assert(targets != nullptr); - - mds_role_t role; - std::stringstream ss; - int role_r = fsmap->parse_role(mds_spec, &role, ss); - if (role_r == 0) { - // We got a role, resolve it to a GID - ldout(cct, 10) << __func__ << ": resolved '" << mds_spec << "' to role '" - << role << "'" << dendl; - targets->push_back( - fsmap->get_filesystem(role.fscid)->mds_map.get_info(role.rank).global_id); - return 0; - } - - std::string strtol_err; - long long rank_or_gid = strict_strtoll(mds_spec.c_str(), 10, &strtol_err); - if (strtol_err.empty()) { - // It is a possible GID - const mds_gid_t mds_gid = mds_gid_t(rank_or_gid); - if (fsmap->gid_exists(mds_gid)) { - ldout(cct, 10) << __func__ << ": validated GID " << mds_gid << dendl; - targets->push_back(mds_gid); - } else { - lderr(cct) << __func__ << ": GID " << mds_gid << " not in MDS map" - << dendl; - return -ENOENT; - } - } else if (mds_spec == "*") { - // It is a wildcard: use all MDSs - const auto mds_info = fsmap->get_mds_info(); - - if (mds_info.empty()) { - lderr(cct) << __func__ << ": * passed but no MDS daemons found" << dendl; - return -ENOENT; - } - - for (const auto i : mds_info) { - targets->push_back(i.first); - } - } else { - // It did not parse as an integer, it is not a wildcard, it must be a name - const mds_gid_t mds_gid = fsmap->find_mds_gid_by_name(mds_spec); - if (mds_gid == 0) { - lderr(cct) << "MDS ID '" << mds_spec << "' not found" << dendl; - - lderr(cct) << "FSMap: " << *fsmap << dendl; - - return -ENOENT; - } else { - ldout(cct, 10) << __func__ << ": resolved ID '" << mds_spec - << "' to GID " << mds_gid << dendl; - targets->push_back(mds_gid); - } - } - - return 0; -} - - -/** - * Authenticate with mon and establish global ID - */ -int Client::authenticate() -{ - assert(client_lock.is_locked_by_me()); - - if (monclient->is_authenticated()) { - return 0; - } - - client_lock.Unlock(); - int r = monclient->authenticate(cct->_conf->client_mount_timeout); - client_lock.Lock(); - if (r < 0) { - return r; - } - - whoami = monclient->get_global_id(); - messenger->set_myname(entity_name_t::CLIENT(whoami.v)); - - return 0; -} - -int Client::fetch_fsmap(bool user) -{ - int r; - // Retrieve FSMap to enable looking up daemon addresses. We need FSMap - // rather than MDSMap because no one MDSMap contains all the daemons, and - // a `tell` can address any daemon. - version_t fsmap_latest; - do { - C_SaferCond cond; - monclient->get_version("fsmap", &fsmap_latest, NULL, &cond); - client_lock.Unlock(); - r = cond.wait(); - client_lock.Lock(); - } while (r == -EAGAIN); - - if (r < 0) { - lderr(cct) << "Failed to learn FSMap version: " << cpp_strerror(r) << dendl; - return r; - } - - ldout(cct, 10) << __func__ << " learned FSMap version " << fsmap_latest << dendl; - - if (user) { - if (!fsmap_user || fsmap_user->get_epoch() < fsmap_latest) { - monclient->sub_want("fsmap.user", fsmap_latest, CEPH_SUBSCRIBE_ONETIME); - monclient->renew_subs(); - wait_on_list(waiting_for_fsmap); - } - assert(fsmap_user); - assert(fsmap_user->get_epoch() >= fsmap_latest); - } else { - if (!fsmap || fsmap->get_epoch() < fsmap_latest) { - monclient->sub_want("fsmap", fsmap_latest, CEPH_SUBSCRIBE_ONETIME); - monclient->renew_subs(); - wait_on_list(waiting_for_fsmap); - } - assert(fsmap); - assert(fsmap->get_epoch() >= fsmap_latest); - } - ldout(cct, 10) << __func__ << " finished waiting for FSMap version " - << fsmap_latest << dendl; - return 0; -} - -/** - * - * @mds_spec one of ID, rank, GID, "*" - * - */ -int Client::mds_command( - const std::string &mds_spec, - const vector& cmd, - const bufferlist& inbl, - bufferlist *outbl, - string *outs, - Context *onfinish) -{ - Mutex::Locker lock(client_lock); - - if (!initialized) - return -ENOTCONN; - - int r; - r = authenticate(); - if (r < 0) { - return r; - } - - r = fetch_fsmap(false); - if (r < 0) { - return r; - } - - // Look up MDS target(s) of the command - std::vector targets; - r = resolve_mds(mds_spec, &targets); - if (r < 0) { - return r; - } - - // If daemons are laggy, we won't send them commands. If all - // are laggy then we fail. - std::vector non_laggy; - for (const auto gid : targets) { - const auto info = fsmap->get_info_gid(gid); - if (!info.laggy()) { - non_laggy.push_back(gid); - } - } - if (non_laggy.size() == 0) { - *outs = "All targeted MDS daemons are laggy"; - return -ENOENT; - } - - if (metadata.empty()) { - // We are called on an unmounted client, so metadata - // won't be initialized yet. - populate_metadata(""); - } - - // Send commands to targets - C_GatherBuilder gather(cct, onfinish); - for (const auto target_gid : non_laggy) { - const auto info = fsmap->get_info_gid(target_gid); - - // Open a connection to the target MDS - entity_inst_t inst = info.get_inst(); - ConnectionRef conn = messenger->get_connection(inst); - - // Generate MDSCommandOp state - auto &op = command_table.start_command(); - - op.on_finish = gather.new_sub(); - op.cmd = cmd; - op.outbl = outbl; - op.outs = outs; - op.inbl = inbl; - op.mds_gid = target_gid; - op.con = conn; - - ldout(cct, 4) << __func__ << ": new command op to " << target_gid - << " tid=" << op.tid << cmd << dendl; - - // Construct and send MCommand - MCommand *m = op.get_message(monclient->get_fsid()); - conn->send_message(m); - } - gather.activate(); - - return 0; -} - -void Client::handle_command_reply(MCommandReply *m) -{ - ceph_tid_t const tid = m->get_tid(); - - ldout(cct, 10) << __func__ << ": tid=" << m->get_tid() << dendl; - - if (!command_table.exists(tid)) { - ldout(cct, 1) << __func__ << ": unknown tid " << tid << ", dropping" << dendl; - m->put(); - return; - } - - auto &op = command_table.get_command(tid); - if (op.outbl) { - op.outbl->claim(m->get_data()); - } - if (op.outs) { - *op.outs = m->rs; - } - - if (op.on_finish) { - op.on_finish->complete(m->r); - } - - command_table.erase(tid); - - m->put(); -} - -// ------------------- -// MOUNT - -int Client::mount(const std::string &mount_root, const UserPerm& perms, - bool require_mds) -{ - Mutex::Locker lock(client_lock); - - if (mounted) { - ldout(cct, 5) << "already mounted" << dendl; - return 0; - } - - int r = authenticate(); - if (r < 0) { - lderr(cct) << "authentication failed: " << cpp_strerror(r) << dendl; - return r; - } - - std::string want = "mdsmap"; - const auto &mds_ns = cct->_conf->client_mds_namespace; - if (!mds_ns.empty()) { - r = fetch_fsmap(true); - if (r < 0) - return r; - fs_cluster_id_t cid = fsmap_user->get_fs_cid(mds_ns); - if (cid == FS_CLUSTER_ID_NONE) - return -ENOENT; - - std::ostringstream oss; - oss << want << "." << cid; - want = oss.str(); - } - ldout(cct, 10) << "Subscribing to map '" << want << "'" << dendl; - - monclient->sub_want(want, 0, 0); - monclient->renew_subs(); - - tick(); // start tick - - if (require_mds) { - while (1) { - auto availability = mdsmap->is_cluster_available(); - if (availability == MDSMap::STUCK_UNAVAILABLE) { - // Error out - ldout(cct, 10) << "mds cluster unavailable: epoch=" << mdsmap->get_epoch() << dendl; - return CEPH_FUSE_NO_MDS_UP; - } else if (availability == MDSMap::AVAILABLE) { - // Continue to mount - break; - } else if (availability == MDSMap::TRANSIENT_UNAVAILABLE) { - // Else, wait. MDSMonitor will update the map to bring - // us to a conclusion eventually. - wait_on_list(waiting_for_mdsmap); - } else { - // Unexpected value! - ceph_abort(); - } - } - } - - populate_metadata(mount_root.empty() ? "/" : mount_root); - - filepath fp(CEPH_INO_ROOT); - if (!mount_root.empty()) { - fp = filepath(mount_root.c_str()); - } - while (true) { - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_GETATTR); - req->set_filepath(fp); - req->head.args.getattr.mask = CEPH_STAT_CAP_INODE_ALL; - int res = make_request(req, perms); - if (res < 0) { - if (res == -EACCES && root) { - ldout(cct, 1) << __func__ << " EACCES on parent of mount point; quotas may not work" << dendl; - break; - } - return res; - } - - if (fp.depth()) - fp.pop_dentry(); - else - break; - } - - assert(root); - _ll_get(root); - - mounted = true; - - // trace? - if (!cct->_conf->client_trace.empty()) { - traceout.open(cct->_conf->client_trace.c_str()); - if (traceout.is_open()) { - ldout(cct, 1) << "opened trace file '" << cct->_conf->client_trace << "'" << dendl; - } else { - ldout(cct, 1) << "FAILED to open trace file '" << cct->_conf->client_trace << "'" << dendl; - } - } - - /* - ldout(cct, 3) << "op: // client trace data structs" << dendl; - ldout(cct, 3) << "op: struct stat st;" << dendl; - ldout(cct, 3) << "op: struct utimbuf utim;" << dendl; - ldout(cct, 3) << "op: int readlinkbuf_len = 1000;" << dendl; - ldout(cct, 3) << "op: char readlinkbuf[readlinkbuf_len];" << dendl; - ldout(cct, 3) << "op: map dir_contents;" << dendl; - ldout(cct, 3) << "op: map open_files;" << dendl; - ldout(cct, 3) << "op: int fd;" << dendl; - */ - return 0; -} - -// UNMOUNT - -void Client::_close_sessions() -{ - while (!mds_sessions.empty()) { - // send session closes! - for (map::iterator p = mds_sessions.begin(); - p != mds_sessions.end(); - ++p) { - if (p->second->state != MetaSession::STATE_CLOSING) { - _close_mds_session(p->second); - } - } - - // wait for sessions to close - ldout(cct, 2) << "waiting for " << mds_sessions.size() << " mds sessions to close" << dendl; - mount_cond.Wait(client_lock); - } -} - -void Client::flush_mdlog_sync() -{ - if (mds_requests.empty()) - return; - for (map::iterator p = mds_sessions.begin(); - p != mds_sessions.end(); - ++p) { - MetaSession *s = p->second; - flush_mdlog(s); - } -} - -void Client::flush_mdlog(MetaSession *session) -{ - // Only send this to Luminous or newer MDS daemons, older daemons - // will crash if they see an unknown CEPH_SESSION_* value in this msg. - const uint64_t features = session->con->get_features(); - if (HAVE_FEATURE(features, SERVER_LUMINOUS)) { - MClientSession *m = new MClientSession(CEPH_SESSION_REQUEST_FLUSH_MDLOG); - session->con->send_message(m); - } -} - - -void Client::unmount() -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return; - - ldout(cct, 2) << "unmounting" << dendl; - unmounting = true; - - flush_mdlog_sync(); // flush the mdlog for pending requests, if any - while (!mds_requests.empty()) { - ldout(cct, 10) << "waiting on " << mds_requests.size() << " requests" << dendl; - mount_cond.Wait(client_lock); - } - - if (tick_event) - timer.cancel_event(tick_event); - tick_event = 0; - - cwd.reset(); - - // clean up any unclosed files - while (!fd_map.empty()) { - Fh *fh = fd_map.begin()->second; - fd_map.erase(fd_map.begin()); - ldout(cct, 0) << " destroyed lost open file " << fh << " on " << *fh->inode << dendl; - _release_fh(fh); - } - - while (!ll_unclosed_fh_set.empty()) { - set::iterator it = ll_unclosed_fh_set.begin(); - Fh *fh = *it; - ll_unclosed_fh_set.erase(fh); - ldout(cct, 0) << " destroyed lost open file " << fh << " on " << *(fh->inode) << dendl; - _release_fh(fh); - } - - while (!opened_dirs.empty()) { - dir_result_t *dirp = *opened_dirs.begin(); - ldout(cct, 0) << " destroyed lost open dir " << dirp << " on " << *dirp->inode << dendl; - _closedir(dirp); - } - - _ll_drop_pins(); - - if (blacklisted) { - ldout(cct, 0) << " skipping clean shutdown, we are blacklisted" << dendl; - - if (cct->_conf->client_oc) { - // Purge all cached data so that ObjectCacher doesn't get hung up - // trying to flush it. ObjectCacher's behaviour on EBLACKLISTED - // is to just leave things marked dirty - // (http://tracker.ceph.com/issues/9105) - for (const auto &i : inode_map) { - objectcacher->purge_set(&(i.second->oset)); - } - } - - mounted = false; - return; - } - - while (unsafe_sync_write > 0) { - ldout(cct, 0) << unsafe_sync_write << " unsafe_sync_writes, waiting" << dendl; - mount_cond.Wait(client_lock); - } - - if (cct->_conf->client_oc) { - // flush/release all buffered data - ceph::unordered_map::iterator next; - for (ceph::unordered_map::iterator p = inode_map.begin(); - p != inode_map.end(); - p = next) { - next = p; - ++next; - Inode *in = p->second; - if (!in) { - ldout(cct, 0) << "null inode_map entry ino " << p->first << dendl; - assert(in); - } - if (!in->caps.empty()) { - InodeRef tmp_ref(in); - _release(in); - _flush(in, new C_Client_FlushComplete(this, in)); - } - } - } - - flush_caps_sync(); - wait_sync_caps(last_flush_tid); - - // empty lru cache - trim_cache(); - - while (lru.lru_get_size() > 0 || - !inode_map.empty()) { - ldout(cct, 2) << "cache still has " << lru.lru_get_size() - << "+" << inode_map.size() << " items" - << ", waiting (for caps to release?)" - << dendl; - utime_t until = ceph_clock_now() + utime_t(5, 0); - int r = mount_cond.WaitUntil(client_lock, until); - if (r == ETIMEDOUT) { - dump_cache(NULL); - } - } - assert(lru.lru_get_size() == 0); - assert(inode_map.empty()); - - // stop tracing - if (!cct->_conf->client_trace.empty()) { - ldout(cct, 1) << "closing trace file '" << cct->_conf->client_trace << "'" << dendl; - traceout.close(); - } - - _close_sessions(); - - mounted = false; - - ldout(cct, 2) << "unmounted." << dendl; -} - -void Client::flush_cap_releases() -{ - // send any cap releases - for (map::iterator p = mds_sessions.begin(); - p != mds_sessions.end(); - ++p) { - if (p->second->release && mdsmap->is_clientreplay_or_active_or_stopping( - p->first)) { - if (cct->_conf->client_inject_release_failure) { - ldout(cct, 20) << __func__ << " injecting failure to send cap release message" << dendl; - p->second->release->put(); - } else { - p->second->con->send_message(p->second->release); - } - p->second->release = 0; - } - } -} - -void Client::tick() -{ - if (cct->_conf->client_debug_inject_tick_delay > 0) { - sleep(cct->_conf->client_debug_inject_tick_delay); - assert(0 == cct->_conf->set_val("client_debug_inject_tick_delay", "0")); - cct->_conf->apply_changes(NULL); - } - - ldout(cct, 21) << "tick" << dendl; - tick_event = timer.add_event_after( - cct->_conf->client_tick_interval, - new FunctionContext([this](int) { - // Called back via Timer, which takes client_lock for us - assert(client_lock.is_locked_by_me()); - tick(); - })); - utime_t now = ceph_clock_now(); - - if (!mounted && !mds_requests.empty()) { - MetaRequest *req = mds_requests.begin()->second; - if (req->op_stamp + cct->_conf->client_mount_timeout < now) { - req->abort(-ETIMEDOUT); - if (req->caller_cond) { - req->kick = true; - req->caller_cond->Signal(); - } - signal_cond_list(waiting_for_mdsmap); - for (map::iterator p = mds_sessions.begin(); - p != mds_sessions.end(); - ++p) - signal_context_list(p->second->waiting_for_open); - } - } - - if (mdsmap->get_epoch()) { - // renew caps? - utime_t el = now - last_cap_renew; - if (el > mdsmap->get_session_timeout() / 3.0) - renew_caps(); - - flush_cap_releases(); - } - - // delayed caps - xlist::iterator p = delayed_caps.begin(); - while (!p.end()) { - Inode *in = *p; - ++p; - if (in->hold_caps_until > now) - break; - delayed_caps.pop_front(); - cap_list.push_back(&in->cap_item); - check_caps(in, CHECK_CAPS_NODELAY); - } - - trim_cache(true); -} - -void Client::renew_caps() -{ - ldout(cct, 10) << "renew_caps()" << dendl; - last_cap_renew = ceph_clock_now(); - - for (map::iterator p = mds_sessions.begin(); - p != mds_sessions.end(); - ++p) { - ldout(cct, 15) << "renew_caps requesting from mds." << p->first << dendl; - if (mdsmap->get_state(p->first) >= MDSMap::STATE_REJOIN) - renew_caps(p->second); - } -} - -void Client::renew_caps(MetaSession *session) -{ - ldout(cct, 10) << "renew_caps mds." << session->mds_num << dendl; - session->last_cap_renew_request = ceph_clock_now(); - uint64_t seq = ++session->cap_renew_seq; - session->con->send_message(new MClientSession(CEPH_SESSION_REQUEST_RENEWCAPS, seq)); -} - - -// =============================================================== -// high level (POSIXy) interface - -int Client::_do_lookup(Inode *dir, const string& name, int mask, - InodeRef *target, const UserPerm& perms) -{ - int op = dir->snapid == CEPH_SNAPDIR ? CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; - MetaRequest *req = new MetaRequest(op); - filepath path; - dir->make_nosnap_relative_path(path); - path.push_dentry(name); - req->set_filepath(path); - req->set_inode(dir); - if (cct->_conf->client_debug_getattr_caps && op == CEPH_MDS_OP_LOOKUP) - mask |= DEBUG_GETATTR_CAPS; - req->head.args.getattr.mask = mask; - - ldout(cct, 10) << "_do_lookup on " << path << dendl; - - int r = make_request(req, perms, target); - ldout(cct, 10) << "_do_lookup res is " << r << dendl; - return r; -} - -int Client::_lookup(Inode *dir, const string& dname, int mask, InodeRef *target, - const UserPerm& perms) -{ - int r = 0; - Dentry *dn = NULL; - - if (!dir->is_dir()) { - r = -ENOTDIR; - goto done; - } - - if (dname == "..") { - if (dir->dn_set.empty()) - *target = dir; - else - *target = dir->get_first_parent()->dir->parent_inode; //dirs can't be hard-linked - goto done; - } - - if (dname == ".") { - *target = dir; - goto done; - } - - if (dname.length() > NAME_MAX) { - r = -ENAMETOOLONG; - goto done; - } - - if (dname == cct->_conf->client_snapdir && - dir->snapid == CEPH_NOSNAP) { - *target = open_snapdir(dir); - goto done; - } - - if (dir->dir && - dir->dir->dentries.count(dname)) { - dn = dir->dir->dentries[dname]; - - ldout(cct, 20) << "_lookup have dn " << dname << " mds." << dn->lease_mds << " ttl " << dn->lease_ttl - << " seq " << dn->lease_seq - << dendl; - - if (!dn->inode || dn->inode->caps_issued_mask(mask)) { - // is dn lease valid? - utime_t now = ceph_clock_now(); - if (dn->lease_mds >= 0 && - dn->lease_ttl > now && - mds_sessions.count(dn->lease_mds)) { - MetaSession *s = mds_sessions[dn->lease_mds]; - if (s->cap_ttl > now && - s->cap_gen == dn->lease_gen) { - // touch this mds's dir cap too, even though we don't _explicitly_ use it here, to - // make trim_caps() behave. - dir->try_touch_cap(dn->lease_mds); - goto hit_dn; - } - ldout(cct, 20) << " bad lease, cap_ttl " << s->cap_ttl << ", cap_gen " << s->cap_gen - << " vs lease_gen " << dn->lease_gen << dendl; - } - // dir lease? - if (dir->caps_issued_mask(CEPH_CAP_FILE_SHARED)) { - if (dn->cap_shared_gen == dir->shared_gen && - (!dn->inode || dn->inode->caps_issued_mask(mask))) - goto hit_dn; - if (!dn->inode && (dir->flags & I_COMPLETE)) { - ldout(cct, 10) << "_lookup concluded ENOENT locally for " - << *dir << " dn '" << dname << "'" << dendl; - return -ENOENT; - } - } - } else { - ldout(cct, 20) << " no cap on " << dn->inode->vino() << dendl; - } - } else { - // can we conclude ENOENT locally? - if (dir->caps_issued_mask(CEPH_CAP_FILE_SHARED) && - (dir->flags & I_COMPLETE)) { - ldout(cct, 10) << "_lookup concluded ENOENT locally for " << *dir << " dn '" << dname << "'" << dendl; - return -ENOENT; - } - } - - r = _do_lookup(dir, dname, mask, target, perms); - goto done; - - hit_dn: - if (dn->inode) { - *target = dn->inode; - } else { - r = -ENOENT; - } - touch_dn(dn); - - done: - if (r < 0) - ldout(cct, 10) << "_lookup " << *dir << " " << dname << " = " << r << dendl; - else - ldout(cct, 10) << "_lookup " << *dir << " " << dname << " = " << **target << dendl; - return r; -} - -int Client::get_or_create(Inode *dir, const char* name, - Dentry **pdn, bool expect_null) -{ - // lookup - ldout(cct, 20) << "get_or_create " << *dir << " name " << name << dendl; - dir->open_dir(); - if (dir->dir->dentries.count(name)) { - Dentry *dn = dir->dir->dentries[name]; - - // is dn lease valid? - utime_t now = ceph_clock_now(); - if (dn->inode && - dn->lease_mds >= 0 && - dn->lease_ttl > now && - mds_sessions.count(dn->lease_mds)) { - MetaSession *s = mds_sessions[dn->lease_mds]; - if (s->cap_ttl > now && - s->cap_gen == dn->lease_gen) { - if (expect_null) - return -EEXIST; - } - } - *pdn = dn; - } else { - // otherwise link up a new one - *pdn = link(dir->dir, name, NULL, NULL); - } - - // success - return 0; -} - -int Client::path_walk(const filepath& origpath, InodeRef *end, - const UserPerm& perms, bool followsym, int mask) -{ - filepath path = origpath; - InodeRef cur; - if (origpath.absolute()) - cur = root; - else - cur = cwd; - assert(cur); - - ldout(cct, 10) << "path_walk " << path << dendl; - - int symlinks = 0; - - unsigned i=0; - while (i < path.depth() && cur) { - int caps = 0; - const string &dname = path[i]; - ldout(cct, 10) << " " << i << " " << *cur << " " << dname << dendl; - ldout(cct, 20) << " (path is " << path << ")" << dendl; - InodeRef next; - if (cct->_conf->client_permissions) { - int r = may_lookup(cur.get(), perms); - if (r < 0) - return r; - caps = CEPH_CAP_AUTH_SHARED; - } - - /* Get extra requested caps on the last component */ - if (i == (path.depth() - 1)) - caps |= mask; - int r = _lookup(cur.get(), dname, caps, &next, perms); - if (r < 0) - return r; - // only follow trailing symlink if followsym. always follow - // 'directory' symlinks. - if (next && next->is_symlink()) { - symlinks++; - ldout(cct, 20) << " symlink count " << symlinks << ", value is '" << next->symlink << "'" << dendl; - if (symlinks > MAXSYMLINKS) { - return -ELOOP; - } - - if (i < path.depth() - 1) { - // dir symlink - // replace consumed components of path with symlink dir target - filepath resolved(next->symlink.c_str()); - resolved.append(path.postfixpath(i + 1)); - path = resolved; - i = 0; - if (next->symlink[0] == '/') { - cur = root; - } - continue; - } else if (followsym) { - if (next->symlink[0] == '/') { - path = next->symlink.c_str(); - i = 0; - // reset position - cur = root; - } else { - filepath more(next->symlink.c_str()); - // we need to remove the symlink component from off of the path - // before adding the target that the symlink points to. remain - // at the same position in the path. - path.pop_dentry(); - path.append(more); - } - continue; - } - } - cur.swap(next); - i++; - } - if (!cur) - return -ENOENT; - if (end) - end->swap(cur); - return 0; -} - - -// namespace ops - -int Client::link(const char *relexisting, const char *relpath, const UserPerm& perm) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "link" << std::endl; - tout(cct) << relexisting << std::endl; - tout(cct) << relpath << std::endl; - - if (unmounting) - return -ENOTCONN; - - filepath existing(relexisting); - - InodeRef in, dir; - int r = path_walk(existing, &in, perm, true); - if (r < 0) - return r; - if (std::string(relpath) == "/") { - r = -EEXIST; - return r; - } - filepath path(relpath); - string name = path.last_dentry(); - path.pop_dentry(); - - r = path_walk(path, &dir, perm, true); - if (r < 0) - return r; - if (cct->_conf->client_permissions) { - if (S_ISDIR(in->mode)) { - r = -EPERM; - return r; - } - r = may_hardlink(in.get(), perm); - if (r < 0) - return r; - r = may_create(dir.get(), perm); - if (r < 0) - return r; - } - r = _link(in.get(), dir.get(), name.c_str(), perm); - return r; -} - -int Client::unlink(const char *relpath, const UserPerm& perm) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "unlink" << std::endl; - tout(cct) << relpath << std::endl; - - if (unmounting) - return -ENOTCONN; - - if (std::string(relpath) == "/") - return -EISDIR; - - filepath path(relpath); - string name = path.last_dentry(); - path.pop_dentry(); - InodeRef dir; - int r = path_walk(path, &dir, perm); - if (r < 0) - return r; - if (cct->_conf->client_permissions) { - r = may_delete(dir.get(), name.c_str(), perm); - if (r < 0) - return r; - } - return _unlink(dir.get(), name.c_str(), perm); -} - -int Client::rename(const char *relfrom, const char *relto, const UserPerm& perm) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "rename" << std::endl; - tout(cct) << relfrom << std::endl; - tout(cct) << relto << std::endl; - - if (unmounting) - return -ENOTCONN; - - if (std::string(relfrom) == "/" || std::string(relto) == "/") - return -EBUSY; - - filepath from(relfrom); - filepath to(relto); - string fromname = from.last_dentry(); - from.pop_dentry(); - string toname = to.last_dentry(); - to.pop_dentry(); - - InodeRef fromdir, todir; - int r = path_walk(from, &fromdir, perm); - if (r < 0) - goto out; - r = path_walk(to, &todir, perm); - if (r < 0) - goto out; - - if (cct->_conf->client_permissions) { - int r = may_delete(fromdir.get(), fromname.c_str(), perm); - if (r < 0) - return r; - r = may_delete(todir.get(), toname.c_str(), perm); - if (r < 0 && r != -ENOENT) - return r; - } - r = _rename(fromdir.get(), fromname.c_str(), todir.get(), toname.c_str(), perm); -out: - return r; -} - -// dirs - -int Client::mkdir(const char *relpath, mode_t mode, const UserPerm& perm) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "mkdir" << std::endl; - tout(cct) << relpath << std::endl; - tout(cct) << mode << std::endl; - ldout(cct, 10) << "mkdir: " << relpath << dendl; - - if (unmounting) - return -ENOTCONN; - - if (std::string(relpath) == "/") - return -EEXIST; - - filepath path(relpath); - string name = path.last_dentry(); - path.pop_dentry(); - InodeRef dir; - int r = path_walk(path, &dir, perm); - if (r < 0) - return r; - if (cct->_conf->client_permissions) { - r = may_create(dir.get(), perm); - if (r < 0) - return r; - } - return _mkdir(dir.get(), name.c_str(), mode, perm); -} - -int Client::mkdirs(const char *relpath, mode_t mode, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - ldout(cct, 10) << "Client::mkdirs " << relpath << dendl; - tout(cct) << "mkdirs" << std::endl; - tout(cct) << relpath << std::endl; - tout(cct) << mode << std::endl; - - if (unmounting) - return -ENOTCONN; - - //get through existing parts of path - filepath path(relpath); - unsigned int i; - int r = 0, caps = 0; - InodeRef cur, next; - cur = cwd; - for (i=0; i_conf->client_permissions) { - r = may_lookup(cur.get(), perms); - if (r < 0) - break; - caps = CEPH_CAP_AUTH_SHARED; - } - r = _lookup(cur.get(), path[i].c_str(), caps, &next, perms); - if (r < 0) - break; - cur.swap(next); - } - //check that we have work left to do - if (i==path.depth()) return -EEXIST; - if (r!=-ENOENT) return r; - ldout(cct, 20) << "mkdirs got through " << i << " directories on path " << relpath << dendl; - //make new directory at each level - for (; i_conf->client_permissions) { - r = may_create(cur.get(), perms); - if (r < 0) - return r; - } - //make new dir - r = _mkdir(cur.get(), path[i].c_str(), mode, perms, &next); - - //check proper creation/existence - if(-EEXIST == r && i < path.depth() - 1) { - r = _lookup(cur.get(), path[i].c_str(), CEPH_CAP_AUTH_SHARED, &next, perms); - } - if (r < 0) - return r; - //move to new dir and continue - cur.swap(next); - ldout(cct, 20) << "mkdirs: successfully created directory " - << filepath(cur->ino).get_path() << dendl; - } - return 0; -} - -int Client::rmdir(const char *relpath, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "rmdir" << std::endl; - tout(cct) << relpath << std::endl; - - if (unmounting) - return -ENOTCONN; - - if (std::string(relpath) == "/") - return -EBUSY; - - filepath path(relpath); - string name = path.last_dentry(); - path.pop_dentry(); - InodeRef dir; - int r = path_walk(path, &dir, perms); - if (r < 0) - return r; - if (cct->_conf->client_permissions) { - int r = may_delete(dir.get(), name.c_str(), perms); - if (r < 0) - return r; - } - return _rmdir(dir.get(), name.c_str(), perms); -} - -int Client::mknod(const char *relpath, mode_t mode, const UserPerm& perms, dev_t rdev) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "mknod" << std::endl; - tout(cct) << relpath << std::endl; - tout(cct) << mode << std::endl; - tout(cct) << rdev << std::endl; - - if (unmounting) - return -ENOTCONN; - - if (std::string(relpath) == "/") - return -EEXIST; - - filepath path(relpath); - string name = path.last_dentry(); - path.pop_dentry(); - InodeRef dir; - int r = path_walk(path, &dir, perms); - if (r < 0) - return r; - if (cct->_conf->client_permissions) { - int r = may_create(dir.get(), perms); - if (r < 0) - return r; - } - return _mknod(dir.get(), name.c_str(), mode, rdev, perms); -} - -// symlinks - -int Client::symlink(const char *target, const char *relpath, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "symlink" << std::endl; - tout(cct) << target << std::endl; - tout(cct) << relpath << std::endl; - - if (unmounting) - return -ENOTCONN; - - if (std::string(relpath) == "/") - return -EEXIST; - - filepath path(relpath); - string name = path.last_dentry(); - path.pop_dentry(); - InodeRef dir; - int r = path_walk(path, &dir, perms); - if (r < 0) - return r; - if (cct->_conf->client_permissions) { - int r = may_create(dir.get(), perms); - if (r < 0) - return r; - } - return _symlink(dir.get(), name.c_str(), target, perms); -} - -int Client::readlink(const char *relpath, char *buf, loff_t size, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "readlink" << std::endl; - tout(cct) << relpath << std::endl; - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - int r = path_walk(path, &in, perms, false); - if (r < 0) - return r; - - return _readlink(in.get(), buf, size); -} - -int Client::_readlink(Inode *in, char *buf, size_t size) -{ - if (!in->is_symlink()) - return -EINVAL; - - // copy into buf (at most size bytes) - int r = in->symlink.length(); - if (r > (int)size) - r = size; - memcpy(buf, in->symlink.c_str(), r); - return r; -} - - -// inode stuff - -int Client::_getattr(Inode *in, int mask, const UserPerm& perms, bool force) -{ - bool yes = in->caps_issued_mask(mask); - - ldout(cct, 10) << "_getattr mask " << ccap_string(mask) << " issued=" << yes << dendl; - if (yes && !force) - return 0; - - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_GETATTR); - filepath path; - in->make_nosnap_relative_path(path); - req->set_filepath(path); - req->set_inode(in); - req->head.args.getattr.mask = mask; - - int res = make_request(req, perms); - ldout(cct, 10) << "_getattr result=" << res << dendl; - return res; -} - -int Client::_do_setattr(Inode *in, struct ceph_statx *stx, int mask, - const UserPerm& perms, InodeRef *inp) -{ - int issued = in->caps_issued(); - - ldout(cct, 10) << "_setattr mask " << mask << " issued " << - ccap_string(issued) << dendl; - - if (in->snapid != CEPH_NOSNAP) { - return -EROFS; - } - if ((mask & CEPH_SETATTR_SIZE) && - (unsigned long)stx->stx_size > in->size && - is_quota_bytes_exceeded(in, (unsigned long)stx->stx_size - in->size, - perms)) { - return -EDQUOT; - } - - // make the change locally? - if ((in->cap_dirtier_uid >= 0 && perms.uid() != in->cap_dirtier_uid) || - (in->cap_dirtier_gid >= 0 && perms.gid() != in->cap_dirtier_gid)) { - ldout(cct, 10) << __func__ << " caller " << perms.uid() << ":" << perms.gid() - << " != cap dirtier " << in->cap_dirtier_uid << ":" - << in->cap_dirtier_gid << ", forcing sync setattr" - << dendl; - /* - * This works because we implicitly flush the caps as part of the - * request, so the cap update check will happen with the writeback - * cap context, and then the setattr check will happen with the - * caller's context. - * - * In reality this pattern is likely pretty rare (different users - * setattr'ing the same file). If that turns out not to be the - * case later, we can build a more complex pipelined cap writeback - * infrastructure... - */ - if (!mask) - mask |= CEPH_SETATTR_CTIME; - goto force_request; - } - - if (!mask) { - // caller just needs us to bump the ctime - in->ctime = ceph_clock_now(); - in->cap_dirtier_uid = perms.uid(); - in->cap_dirtier_gid = perms.gid(); - if (issued & CEPH_CAP_AUTH_EXCL) - mark_caps_dirty(in, CEPH_CAP_AUTH_EXCL); - else if (issued & CEPH_CAP_FILE_EXCL) - mark_caps_dirty(in, CEPH_CAP_FILE_EXCL); - else if (issued & CEPH_CAP_XATTR_EXCL) - mark_caps_dirty(in, CEPH_CAP_XATTR_EXCL); - else - mask |= CEPH_SETATTR_CTIME; - } - - if (in->caps_issued_mask(CEPH_CAP_AUTH_EXCL)) { - bool kill_sguid = mask & (CEPH_SETATTR_SIZE|CEPH_SETATTR_KILL_SGUID); - - mask &= ~CEPH_SETATTR_KILL_SGUID; - - if (mask & CEPH_SETATTR_UID) { - in->ctime = ceph_clock_now(); - in->cap_dirtier_uid = perms.uid(); - in->cap_dirtier_gid = perms.gid(); - in->uid = stx->stx_uid; - mark_caps_dirty(in, CEPH_CAP_AUTH_EXCL); - mask &= ~CEPH_SETATTR_UID; - kill_sguid = true; - ldout(cct,10) << "changing uid to " << stx->stx_uid << dendl; - } - if (mask & CEPH_SETATTR_GID) { - in->ctime = ceph_clock_now(); - in->cap_dirtier_uid = perms.uid(); - in->cap_dirtier_gid = perms.gid(); - in->gid = stx->stx_gid; - mark_caps_dirty(in, CEPH_CAP_AUTH_EXCL); - mask &= ~CEPH_SETATTR_GID; - kill_sguid = true; - ldout(cct,10) << "changing gid to " << stx->stx_gid << dendl; - } - - if (mask & CEPH_SETATTR_MODE) { - in->ctime = ceph_clock_now(); - in->cap_dirtier_uid = perms.uid(); - in->cap_dirtier_gid = perms.gid(); - in->mode = (in->mode & ~07777) | (stx->stx_mode & 07777); - mark_caps_dirty(in, CEPH_CAP_AUTH_EXCL); - mask &= ~CEPH_SETATTR_MODE; - ldout(cct,10) << "changing mode to " << stx->stx_mode << dendl; - } else if (kill_sguid && S_ISREG(in->mode) && (in->mode & (S_IXUSR|S_IXGRP|S_IXOTH))) { - /* Must squash the any setuid/setgid bits with an ownership change */ - in->mode &= ~(S_ISUID|S_ISGID); - mark_caps_dirty(in, CEPH_CAP_AUTH_EXCL); - } - - if (mask & CEPH_SETATTR_BTIME) { - in->ctime = ceph_clock_now(); - in->cap_dirtier_uid = perms.uid(); - in->cap_dirtier_gid = perms.gid(); - in->btime = utime_t(stx->stx_btime); - mark_caps_dirty(in, CEPH_CAP_AUTH_EXCL); - mask &= ~CEPH_SETATTR_BTIME; - ldout(cct,10) << "changing btime to " << in->btime << dendl; - } - } else if (mask & CEPH_SETATTR_SIZE) { - /* If we don't have Ax, then we must ask the server to clear them on truncate */ - mask |= CEPH_SETATTR_KILL_SGUID; - } - - if (in->caps_issued_mask(CEPH_CAP_FILE_EXCL)) { - if (mask & (CEPH_SETATTR_MTIME|CEPH_SETATTR_ATIME)) { - if (mask & CEPH_SETATTR_MTIME) - in->mtime = utime_t(stx->stx_mtime); - if (mask & CEPH_SETATTR_ATIME) - in->atime = utime_t(stx->stx_atime); - in->ctime = ceph_clock_now(); - in->cap_dirtier_uid = perms.uid(); - in->cap_dirtier_gid = perms.gid(); - in->time_warp_seq++; - mark_caps_dirty(in, CEPH_CAP_FILE_EXCL); - mask &= ~(CEPH_SETATTR_MTIME|CEPH_SETATTR_ATIME); - } - } - if (!mask) { - in->change_attr++; - return 0; - } - -force_request: - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_SETATTR); - - filepath path; - - in->make_nosnap_relative_path(path); - req->set_filepath(path); - req->set_inode(in); - - if (mask & CEPH_SETATTR_KILL_SGUID) { - req->inode_drop |= CEPH_CAP_AUTH_SHARED; - } - if (mask & CEPH_SETATTR_MODE) { - req->head.args.setattr.mode = stx->stx_mode; - req->inode_drop |= CEPH_CAP_AUTH_SHARED; - ldout(cct,10) << "changing mode to " << stx->stx_mode << dendl; - } - if (mask & CEPH_SETATTR_UID) { - req->head.args.setattr.uid = stx->stx_uid; - req->inode_drop |= CEPH_CAP_AUTH_SHARED; - ldout(cct,10) << "changing uid to " << stx->stx_uid << dendl; - } - if (mask & CEPH_SETATTR_GID) { - req->head.args.setattr.gid = stx->stx_gid; - req->inode_drop |= CEPH_CAP_AUTH_SHARED; - ldout(cct,10) << "changing gid to " << stx->stx_gid << dendl; - } - if (mask & CEPH_SETATTR_BTIME) { - req->head.args.setattr.btime = utime_t(stx->stx_btime); - req->inode_drop |= CEPH_CAP_AUTH_SHARED; - } - if (mask & CEPH_SETATTR_MTIME) { - req->head.args.setattr.mtime = utime_t(stx->stx_mtime); - req->inode_drop |= CEPH_CAP_AUTH_SHARED | CEPH_CAP_FILE_RD | - CEPH_CAP_FILE_WR; - } - if (mask & CEPH_SETATTR_ATIME) { - req->head.args.setattr.atime = utime_t(stx->stx_atime); - req->inode_drop |= CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD | - CEPH_CAP_FILE_WR; - } - if (mask & CEPH_SETATTR_SIZE) { - if ((unsigned long)stx->stx_size < mdsmap->get_max_filesize()) { - req->head.args.setattr.size = stx->stx_size; - ldout(cct,10) << "changing size to " << stx->stx_size << dendl; - } else { //too big! - put_request(req); - ldout(cct,10) << "unable to set size to " << stx->stx_size << ". Too large!" << dendl; - return -EFBIG; - } - req->inode_drop |= CEPH_CAP_AUTH_SHARED | CEPH_CAP_FILE_RD | - CEPH_CAP_FILE_WR; - } - req->head.args.setattr.mask = mask; - - req->regetattr_mask = mask; - - int res = make_request(req, perms, inp); - ldout(cct, 10) << "_setattr result=" << res << dendl; - return res; -} - -/* Note that we only care about attrs that setattr cares about */ -void Client::stat_to_statx(struct stat *st, struct ceph_statx *stx) -{ - stx->stx_size = st->st_size; - stx->stx_mode = st->st_mode; - stx->stx_uid = st->st_uid; - stx->stx_gid = st->st_gid; - stx->stx_mtime = st->st_mtim; - stx->stx_atime = st->st_atim; -} - -int Client::__setattrx(Inode *in, struct ceph_statx *stx, int mask, - const UserPerm& perms, InodeRef *inp) -{ - int ret = _do_setattr(in, stx, mask, perms, inp); - if (ret < 0) - return ret; - if (mask & CEPH_SETATTR_MODE) - ret = _posix_acl_chmod(in, stx->stx_mode, perms); - return ret; -} - -int Client::_setattrx(InodeRef &in, struct ceph_statx *stx, int mask, - const UserPerm& perms) -{ - mask &= (CEPH_SETATTR_MODE | CEPH_SETATTR_UID | - CEPH_SETATTR_GID | CEPH_SETATTR_MTIME | - CEPH_SETATTR_ATIME | CEPH_SETATTR_SIZE | - CEPH_SETATTR_CTIME | CEPH_SETATTR_BTIME); - if (cct->_conf->client_permissions) { - int r = may_setattr(in.get(), stx, mask, perms); - if (r < 0) - return r; - } - return __setattrx(in.get(), stx, mask, perms); -} - -int Client::_setattr(InodeRef &in, struct stat *attr, int mask, - const UserPerm& perms) -{ - struct ceph_statx stx; - - stat_to_statx(attr, &stx); - mask &= ~CEPH_SETATTR_BTIME; - - if ((mask & CEPH_SETATTR_UID) && attr->st_uid == static_cast(-1)) { - mask &= ~CEPH_SETATTR_UID; - } - if ((mask & CEPH_SETATTR_GID) && attr->st_gid == static_cast(-1)) { - mask &= ~CEPH_SETATTR_GID; - } - - return _setattrx(in, &stx, mask, perms); -} - -int Client::setattr(const char *relpath, struct stat *attr, int mask, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "setattr" << std::endl; - tout(cct) << relpath << std::endl; - tout(cct) << mask << std::endl; - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - int r = path_walk(path, &in, perms); - if (r < 0) - return r; - return _setattr(in, attr, mask, perms); -} - -int Client::setattrx(const char *relpath, struct ceph_statx *stx, int mask, - const UserPerm& perms, int flags) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "setattrx" << std::endl; - tout(cct) << relpath << std::endl; - tout(cct) << mask << std::endl; - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - int r = path_walk(path, &in, perms, !(flags & AT_SYMLINK_NOFOLLOW)); - if (r < 0) - return r; - return _setattrx(in, stx, mask, perms); -} - -int Client::fsetattr(int fd, struct stat *attr, int mask, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "fsetattr" << std::endl; - tout(cct) << fd << std::endl; - tout(cct) << mask << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; -#if defined(__linux__) && defined(O_PATH) - if (f->flags & O_PATH) - return -EBADF; -#endif - return _setattr(f->inode, attr, mask, perms); -} - -int Client::fsetattrx(int fd, struct ceph_statx *stx, int mask, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "fsetattr" << std::endl; - tout(cct) << fd << std::endl; - tout(cct) << mask << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; -#if defined(__linux__) && defined(O_PATH) - if (f->flags & O_PATH) - return -EBADF; -#endif - return _setattrx(f->inode, stx, mask, perms); -} - -int Client::stat(const char *relpath, struct stat *stbuf, const UserPerm& perms, - frag_info_t *dirstat, int mask) -{ - ldout(cct, 3) << "stat enter (relpath " << relpath << " mask " << mask << ")" << dendl; - Mutex::Locker lock(client_lock); - tout(cct) << "stat" << std::endl; - tout(cct) << relpath << std::endl; - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - int r = path_walk(path, &in, perms, true, mask); - if (r < 0) - return r; - r = _getattr(in, mask, perms); - if (r < 0) { - ldout(cct, 3) << "stat exit on error!" << dendl; - return r; - } - fill_stat(in, stbuf, dirstat); - ldout(cct, 3) << "stat exit (relpath " << relpath << " mask " << mask << ")" << dendl; - return r; -} - -unsigned Client::statx_to_mask(unsigned int flags, unsigned int want) -{ - unsigned mask = 0; - - /* if NO_ATTR_SYNC is set, then we don't need any -- just use what's in cache */ - if (flags & AT_NO_ATTR_SYNC) - goto out; - - /* Always set PIN to distinguish from AT_NO_ATTR_SYNC case */ - mask |= CEPH_CAP_PIN; - if (want & (CEPH_STATX_MODE|CEPH_STATX_UID|CEPH_STATX_GID|CEPH_STATX_BTIME|CEPH_STATX_CTIME|CEPH_STATX_VERSION)) - mask |= CEPH_CAP_AUTH_SHARED; - if (want & (CEPH_STATX_NLINK|CEPH_STATX_CTIME|CEPH_STATX_VERSION)) - mask |= CEPH_CAP_LINK_SHARED; - if (want & (CEPH_STATX_ATIME|CEPH_STATX_MTIME|CEPH_STATX_CTIME|CEPH_STATX_SIZE|CEPH_STATX_BLOCKS|CEPH_STATX_VERSION)) - mask |= CEPH_CAP_FILE_SHARED; - if (want & (CEPH_STATX_VERSION|CEPH_STATX_CTIME)) - mask |= CEPH_CAP_XATTR_SHARED; -out: - return mask; -} - -int Client::statx(const char *relpath, struct ceph_statx *stx, - const UserPerm& perms, - unsigned int want, unsigned int flags) -{ - ldout(cct, 3) << "statx enter (relpath " << relpath << " want " << want << ")" << dendl; - Mutex::Locker lock(client_lock); - tout(cct) << "statx" << std::endl; - tout(cct) << relpath << std::endl; - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - - unsigned mask = statx_to_mask(flags, want); - - int r = path_walk(path, &in, perms, !(flags & AT_SYMLINK_NOFOLLOW), mask); - if (r < 0) - return r; - - r = _getattr(in, mask, perms); - if (r < 0) { - ldout(cct, 3) << "statx exit on error!" << dendl; - return r; - } - - fill_statx(in, mask, stx); - ldout(cct, 3) << "statx exit (relpath " << relpath << " mask " << stx->stx_mask << ")" << dendl; - return r; -} - -int Client::lstat(const char *relpath, struct stat *stbuf, - const UserPerm& perms, frag_info_t *dirstat, int mask) -{ - ldout(cct, 3) << "lstat enter (relpath " << relpath << " mask " << mask << ")" << dendl; - Mutex::Locker lock(client_lock); - tout(cct) << "lstat" << std::endl; - tout(cct) << relpath << std::endl; - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - // don't follow symlinks - int r = path_walk(path, &in, perms, false, mask); - if (r < 0) - return r; - r = _getattr(in, mask, perms); - if (r < 0) { - ldout(cct, 3) << "lstat exit on error!" << dendl; - return r; - } - fill_stat(in, stbuf, dirstat); - ldout(cct, 3) << "lstat exit (relpath " << relpath << " mask " << mask << ")" << dendl; - return r; -} - -int Client::fill_stat(Inode *in, struct stat *st, frag_info_t *dirstat, nest_info_t *rstat) -{ - ldout(cct, 10) << "fill_stat on " << in->ino << " snap/dev" << in->snapid - << " mode 0" << oct << in->mode << dec - << " mtime " << in->mtime << " ctime " << in->ctime << dendl; - memset(st, 0, sizeof(struct stat)); - if (use_faked_inos()) - st->st_ino = in->faked_ino; - else - st->st_ino = in->ino; - st->st_dev = in->snapid; - st->st_mode = in->mode; - st->st_rdev = in->rdev; - st->st_nlink = in->nlink; - st->st_uid = in->uid; - st->st_gid = in->gid; - if (in->ctime > in->mtime) { - stat_set_ctime_sec(st, in->ctime.sec()); - stat_set_ctime_nsec(st, in->ctime.nsec()); - } else { - stat_set_ctime_sec(st, in->mtime.sec()); - stat_set_ctime_nsec(st, in->mtime.nsec()); - } - stat_set_atime_sec(st, in->atime.sec()); - stat_set_atime_nsec(st, in->atime.nsec()); - stat_set_mtime_sec(st, in->mtime.sec()); - stat_set_mtime_nsec(st, in->mtime.nsec()); - if (in->is_dir()) { - if (cct->_conf->client_dirsize_rbytes) - st->st_size = in->rstat.rbytes; - else - st->st_size = in->dirstat.size(); - st->st_blocks = 1; - } else { - st->st_size = in->size; - st->st_blocks = (in->size + 511) >> 9; - } - st->st_blksize = MAX(in->layout.stripe_unit, 4096); - - if (dirstat) - *dirstat = in->dirstat; - if (rstat) - *rstat = in->rstat; - - return in->caps_issued(); -} - -void Client::fill_statx(Inode *in, unsigned int mask, struct ceph_statx *stx) -{ - ldout(cct, 10) << "fill_statx on " << in->ino << " snap/dev" << in->snapid - << " mode 0" << oct << in->mode << dec - << " mtime " << in->mtime << " ctime " << in->ctime << dendl; - memset(stx, 0, sizeof(struct ceph_statx)); - - /* - * If mask is 0, then the caller set AT_NO_ATTR_SYNC. Reset the mask - * so that all bits are set. - */ - if (!mask) - mask = ~0; - - /* These are always considered to be available */ - stx->stx_dev = in->snapid; - stx->stx_blksize = MAX(in->layout.stripe_unit, 4096); - - /* Type bits are always set, even when CEPH_STATX_MODE is not */ - stx->stx_mode = S_IFMT & in->mode; - stx->stx_ino = use_faked_inos() ? in->faked_ino : (ino_t)in->ino; - stx->stx_rdev = in->rdev; - stx->stx_mask |= (CEPH_STATX_INO|CEPH_STATX_RDEV); - - if (mask & CEPH_CAP_AUTH_SHARED) { - stx->stx_uid = in->uid; - stx->stx_gid = in->gid; - stx->stx_mode = in->mode; - in->btime.to_timespec(&stx->stx_btime); - stx->stx_mask |= (CEPH_STATX_MODE|CEPH_STATX_UID|CEPH_STATX_GID|CEPH_STATX_BTIME); - } - - if (mask & CEPH_CAP_LINK_SHARED) { - stx->stx_nlink = in->nlink; - stx->stx_mask |= CEPH_STATX_NLINK; - } - - if (mask & CEPH_CAP_FILE_SHARED) { - - in->atime.to_timespec(&stx->stx_atime); - in->mtime.to_timespec(&stx->stx_mtime); - - if (in->is_dir()) { - if (cct->_conf->client_dirsize_rbytes) - stx->stx_size = in->rstat.rbytes; - else - stx->stx_size = in->dirstat.size(); - stx->stx_blocks = 1; - } else { - stx->stx_size = in->size; - stx->stx_blocks = (in->size + 511) >> 9; - } - stx->stx_mask |= (CEPH_STATX_ATIME|CEPH_STATX_MTIME| - CEPH_STATX_SIZE|CEPH_STATX_BLOCKS); - } - - /* Change time and change_attr both require all shared caps to view */ - if ((mask & CEPH_STAT_CAP_INODE_ALL) == CEPH_STAT_CAP_INODE_ALL) { - stx->stx_version = in->change_attr; - if (in->ctime > in->mtime) - in->ctime.to_timespec(&stx->stx_ctime); - else - in->mtime.to_timespec(&stx->stx_ctime); - stx->stx_mask |= (CEPH_STATX_CTIME|CEPH_STATX_VERSION); - } - -} - -void Client::touch_dn(Dentry *dn) -{ - lru.lru_touch(dn); -} - -int Client::chmod(const char *relpath, mode_t mode, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "chmod" << std::endl; - tout(cct) << relpath << std::endl; - tout(cct) << mode << std::endl; - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - int r = path_walk(path, &in, perms); - if (r < 0) - return r; - struct stat attr; - attr.st_mode = mode; - return _setattr(in, &attr, CEPH_SETATTR_MODE, perms); -} - -int Client::fchmod(int fd, mode_t mode, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "fchmod" << std::endl; - tout(cct) << fd << std::endl; - tout(cct) << mode << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; -#if defined(__linux__) && defined(O_PATH) - if (f->flags & O_PATH) - return -EBADF; -#endif - struct stat attr; - attr.st_mode = mode; - return _setattr(f->inode, &attr, CEPH_SETATTR_MODE, perms); -} - -int Client::lchmod(const char *relpath, mode_t mode, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "lchmod" << std::endl; - tout(cct) << relpath << std::endl; - tout(cct) << mode << std::endl; - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - // don't follow symlinks - int r = path_walk(path, &in, perms, false); - if (r < 0) - return r; - struct stat attr; - attr.st_mode = mode; - return _setattr(in, &attr, CEPH_SETATTR_MODE, perms); -} - -int Client::chown(const char *relpath, uid_t new_uid, gid_t new_gid, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "chown" << std::endl; - tout(cct) << relpath << std::endl; - tout(cct) << new_uid << std::endl; - tout(cct) << new_gid << std::endl; - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - int r = path_walk(path, &in, perms); - if (r < 0) - return r; - struct stat attr; - attr.st_uid = new_uid; - attr.st_gid = new_gid; - return _setattr(in, &attr, CEPH_SETATTR_UID|CEPH_SETATTR_GID, perms); -} - -int Client::fchown(int fd, uid_t new_uid, gid_t new_gid, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "fchown" << std::endl; - tout(cct) << fd << std::endl; - tout(cct) << new_uid << std::endl; - tout(cct) << new_gid << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; -#if defined(__linux__) && defined(O_PATH) - if (f->flags & O_PATH) - return -EBADF; -#endif - struct stat attr; - attr.st_uid = new_uid; - attr.st_gid = new_gid; - int mask = 0; - if (new_uid != static_cast(-1)) mask |= CEPH_SETATTR_UID; - if (new_gid != static_cast(-1)) mask |= CEPH_SETATTR_GID; - return _setattr(f->inode, &attr, mask, perms); -} - -int Client::lchown(const char *relpath, uid_t new_uid, gid_t new_gid, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "lchown" << std::endl; - tout(cct) << relpath << std::endl; - tout(cct) << new_uid << std::endl; - tout(cct) << new_gid << std::endl; - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - // don't follow symlinks - int r = path_walk(path, &in, perms, false); - if (r < 0) - return r; - struct stat attr; - attr.st_uid = new_uid; - attr.st_gid = new_gid; - int mask = 0; - if (new_uid != static_cast(-1)) mask |= CEPH_SETATTR_UID; - if (new_gid != static_cast(-1)) mask |= CEPH_SETATTR_GID; - return _setattr(in, &attr, mask, perms); -} - -int Client::utime(const char *relpath, struct utimbuf *buf, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "utime" << std::endl; - tout(cct) << relpath << std::endl; - tout(cct) << buf->modtime << std::endl; - tout(cct) << buf->actime << std::endl; - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - int r = path_walk(path, &in, perms); - if (r < 0) - return r; - struct stat attr; - stat_set_mtime_sec(&attr, buf->modtime); - stat_set_mtime_nsec(&attr, 0); - stat_set_atime_sec(&attr, buf->actime); - stat_set_atime_nsec(&attr, 0); - return _setattr(in, &attr, CEPH_SETATTR_MTIME|CEPH_SETATTR_ATIME, perms); -} - -int Client::lutime(const char *relpath, struct utimbuf *buf, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "lutime" << std::endl; - tout(cct) << relpath << std::endl; - tout(cct) << buf->modtime << std::endl; - tout(cct) << buf->actime << std::endl; - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - // don't follow symlinks - int r = path_walk(path, &in, perms, false); - if (r < 0) - return r; - struct stat attr; - stat_set_mtime_sec(&attr, buf->modtime); - stat_set_mtime_nsec(&attr, 0); - stat_set_atime_sec(&attr, buf->actime); - stat_set_atime_nsec(&attr, 0); - return _setattr(in, &attr, CEPH_SETATTR_MTIME|CEPH_SETATTR_ATIME, perms); -} - -int Client::flock(int fd, int operation, uint64_t owner) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "flock" << std::endl; - tout(cct) << fd << std::endl; - tout(cct) << operation << std::endl; - tout(cct) << owner << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; - - return _flock(f, operation, owner); -} - -int Client::opendir(const char *relpath, dir_result_t **dirpp, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "opendir" << std::endl; - tout(cct) << relpath << std::endl; - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - int r = path_walk(path, &in, perms, true); - if (r < 0) - return r; - if (cct->_conf->client_permissions) { - int r = may_open(in.get(), O_RDONLY, perms); - if (r < 0) - return r; - } - r = _opendir(in.get(), dirpp, perms); - /* if ENOTDIR, dirpp will be an uninitialized point and it's very dangerous to access its value */ - if (r != -ENOTDIR) - tout(cct) << (unsigned long)*dirpp << std::endl; - return r; -} - -int Client::_opendir(Inode *in, dir_result_t **dirpp, const UserPerm& perms) -{ - if (!in->is_dir()) - return -ENOTDIR; - *dirpp = new dir_result_t(in, perms); - opened_dirs.insert(*dirpp); - ldout(cct, 3) << "_opendir(" << in->ino << ") = " << 0 << " (" << *dirpp << ")" << dendl; - return 0; -} - - -int Client::closedir(dir_result_t *dir) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "closedir" << std::endl; - tout(cct) << (unsigned long)dir << std::endl; - - ldout(cct, 3) << "closedir(" << dir << ") = 0" << dendl; - _closedir(dir); - return 0; -} - -void Client::_closedir(dir_result_t *dirp) -{ - ldout(cct, 10) << "_closedir(" << dirp << ")" << dendl; - if (dirp->inode) { - ldout(cct, 10) << "_closedir detaching inode " << dirp->inode << dendl; - dirp->inode.reset(); - } - _readdir_drop_dirp_buffer(dirp); - opened_dirs.erase(dirp); - delete dirp; -} - -void Client::rewinddir(dir_result_t *dirp) -{ - Mutex::Locker lock(client_lock); - ldout(cct, 3) << "rewinddir(" << dirp << ")" << dendl; - - if (unmounting) - return; - - dir_result_t *d = static_cast(dirp); - _readdir_drop_dirp_buffer(d); - d->reset(); -} - -loff_t Client::telldir(dir_result_t *dirp) -{ - dir_result_t *d = static_cast(dirp); - ldout(cct, 3) << "telldir(" << dirp << ") = " << d->offset << dendl; - return d->offset; -} - -void Client::seekdir(dir_result_t *dirp, loff_t offset) -{ - Mutex::Locker lock(client_lock); - - ldout(cct, 3) << "seekdir(" << dirp << ", " << offset << ")" << dendl; - - if (unmounting) - return; - - if (offset == dirp->offset) - return; - - if (offset > dirp->offset) - dirp->release_count = 0; // bump if we do a forward seek - else - dirp->ordered_count = 0; // disable filling readdir cache - - if (dirp->hash_order()) { - if (dirp->offset > offset) { - _readdir_drop_dirp_buffer(dirp); - dirp->reset(); - } - } else { - if (offset == 0 || - dirp->buffer_frag != frag_t(dir_result_t::fpos_high(offset)) || - dirp->offset_low() > dir_result_t::fpos_low(offset)) { - _readdir_drop_dirp_buffer(dirp); - dirp->reset(); - } - } - - dirp->offset = offset; -} - - -//struct dirent { -// ino_t d_ino; /* inode number */ -// off_t d_off; /* offset to the next dirent */ -// unsigned short d_reclen; /* length of this record */ -// unsigned char d_type; /* type of file */ -// char d_name[256]; /* filename */ -//}; -void Client::fill_dirent(struct dirent *de, const char *name, int type, uint64_t ino, loff_t next_off) -{ - strncpy(de->d_name, name, 255); - de->d_name[255] = '\0'; -#ifndef __CYGWIN__ - de->d_ino = ino; -#if !defined(DARWIN) && !defined(__FreeBSD__) - de->d_off = next_off; -#endif - de->d_reclen = 1; - de->d_type = IFTODT(type); - ldout(cct, 10) << "fill_dirent '" << de->d_name << "' -> " << inodeno_t(de->d_ino) - << " type " << (int)de->d_type << " w/ next_off " << hex << next_off << dec << dendl; -#endif -} - -void Client::_readdir_next_frag(dir_result_t *dirp) -{ - frag_t fg = dirp->buffer_frag; - - if (fg.is_rightmost()) { - ldout(cct, 10) << "_readdir_next_frag advance from " << fg << " to END" << dendl; - dirp->set_end(); - return; - } - - // advance - fg = fg.next(); - ldout(cct, 10) << "_readdir_next_frag advance from " << dirp->buffer_frag << " to " << fg << dendl; - - if (dirp->hash_order()) { - // keep last_name - int64_t new_offset = dir_result_t::make_fpos(fg.value(), 2, true); - if (dirp->offset < new_offset) // don't decrease offset - dirp->offset = new_offset; - } else { - dirp->last_name.clear(); - dirp->offset = dir_result_t::make_fpos(fg, 2, false); - _readdir_rechoose_frag(dirp); - } -} - -void Client::_readdir_rechoose_frag(dir_result_t *dirp) -{ - assert(dirp->inode); - - if (dirp->hash_order()) - return; - - frag_t cur = frag_t(dirp->offset_high()); - frag_t fg = dirp->inode->dirfragtree[cur.value()]; - if (fg != cur) { - ldout(cct, 10) << "_readdir_rechoose_frag frag " << cur << " maps to " << fg << dendl; - dirp->offset = dir_result_t::make_fpos(fg, 2, false); - dirp->last_name.clear(); - dirp->next_offset = 2; - } -} - -void Client::_readdir_drop_dirp_buffer(dir_result_t *dirp) -{ - ldout(cct, 10) << "_readdir_drop_dirp_buffer " << dirp << dendl; - dirp->buffer.clear(); -} - -int Client::_readdir_get_frag(dir_result_t *dirp) -{ - assert(dirp); - assert(dirp->inode); - - // get the current frag. - frag_t fg; - if (dirp->hash_order()) - fg = dirp->inode->dirfragtree[dirp->offset_high()]; - else - fg = frag_t(dirp->offset_high()); - - ldout(cct, 10) << "_readdir_get_frag " << dirp << " on " << dirp->inode->ino << " fg " << fg - << " offset " << hex << dirp->offset << dec << dendl; - - int op = CEPH_MDS_OP_READDIR; - if (dirp->inode && dirp->inode->snapid == CEPH_SNAPDIR) - op = CEPH_MDS_OP_LSSNAP; - - InodeRef& diri = dirp->inode; - - MetaRequest *req = new MetaRequest(op); - filepath path; - diri->make_nosnap_relative_path(path); - req->set_filepath(path); - req->set_inode(diri.get()); - req->head.args.readdir.frag = fg; - req->head.args.readdir.flags = CEPH_READDIR_REPLY_BITFLAGS; - if (dirp->last_name.length()) { - req->path2.set_path(dirp->last_name.c_str()); - } else if (dirp->hash_order()) { - req->head.args.readdir.offset_hash = dirp->offset_high(); - } - req->dirp = dirp; - - bufferlist dirbl; - int res = make_request(req, dirp->perms, NULL, NULL, -1, &dirbl); - - if (res == -EAGAIN) { - ldout(cct, 10) << "_readdir_get_frag got EAGAIN, retrying" << dendl; - _readdir_rechoose_frag(dirp); - return _readdir_get_frag(dirp); - } - - if (res == 0) { - ldout(cct, 10) << "_readdir_get_frag " << dirp << " got frag " << dirp->buffer_frag - << " size " << dirp->buffer.size() << dendl; - } else { - ldout(cct, 10) << "_readdir_get_frag got error " << res << ", setting end flag" << dendl; - dirp->set_end(); - } - - return res; -} - -struct dentry_off_lt { - bool operator()(const Dentry* dn, int64_t off) const { - return dir_result_t::fpos_cmp(dn->offset, off) < 0; - } -}; - -int Client::_readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p, - int caps, bool getref) -{ - assert(client_lock.is_locked()); - ldout(cct, 10) << "_readdir_cache_cb " << dirp << " on " << dirp->inode->ino - << " last_name " << dirp->last_name << " offset " << hex << dirp->offset << dec - << dendl; - Dir *dir = dirp->inode->dir; - - if (!dir) { - ldout(cct, 10) << " dir is empty" << dendl; - dirp->set_end(); - return 0; - } - - vector::iterator pd = std::lower_bound(dir->readdir_cache.begin(), - dir->readdir_cache.end(), - dirp->offset, dentry_off_lt()); - - string dn_name; - while (true) { - if (!dirp->inode->is_complete_and_ordered()) - return -EAGAIN; - if (pd == dir->readdir_cache.end()) - break; - Dentry *dn = *pd; - if (dn->inode == NULL) { - ldout(cct, 15) << " skipping null '" << dn->name << "'" << dendl; - ++pd; - continue; - } - if (dn->cap_shared_gen != dir->parent_inode->shared_gen) { - ldout(cct, 15) << " skipping mismatch shared gen '" << dn->name << "'" << dendl; - ++pd; - continue; - } - - int r = _getattr(dn->inode, caps, dirp->perms); - if (r < 0) - return r; - - struct ceph_statx stx; - struct dirent de; - fill_statx(dn->inode, caps, &stx); - - uint64_t next_off = dn->offset + 1; - ++pd; - if (pd == dir->readdir_cache.end()) - next_off = dir_result_t::END; - - Inode *in = NULL; - fill_dirent(&de, dn->name.c_str(), stx.stx_mode, stx.stx_ino, next_off); - if (getref) { - in = dn->inode.get(); - _ll_get(in); - } - - dn_name = dn->name; // fill in name while we have lock - - client_lock.Unlock(); - r = cb(p, &de, &stx, next_off, in); // _next_ offset - client_lock.Lock(); - ldout(cct, 15) << " de " << de.d_name << " off " << hex << dn->offset << dec - << " = " << r << dendl; - if (r < 0) { - return r; - } - - dirp->offset = next_off; - if (dirp->at_end()) - dirp->next_offset = 2; - else - dirp->next_offset = dirp->offset_low(); - dirp->last_name = dn_name; // we successfully returned this one; update! - if (r > 0) - return r; - } - - ldout(cct, 10) << "_readdir_cache_cb " << dirp << " on " << dirp->inode->ino << " at end" << dendl; - dirp->set_end(); - return 0; -} - -int Client::readdir_r_cb(dir_result_t *d, add_dirent_cb_t cb, void *p, - unsigned want, unsigned flags, bool getref) -{ - int caps = statx_to_mask(flags, want); - - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - dir_result_t *dirp = static_cast(d); - - ldout(cct, 10) << "readdir_r_cb " << *dirp->inode << " offset " << hex << dirp->offset - << dec << " at_end=" << dirp->at_end() - << " hash_order=" << dirp->hash_order() << dendl; - - struct dirent de; - struct ceph_statx stx; - memset(&de, 0, sizeof(de)); - memset(&stx, 0, sizeof(stx)); - - InodeRef& diri = dirp->inode; - - if (dirp->at_end()) - return 0; - - if (dirp->offset == 0) { - ldout(cct, 15) << " including ." << dendl; - assert(diri->dn_set.size() < 2); // can't have multiple hard-links to a dir - uint64_t next_off = 1; - - int r; - r = _getattr(diri, caps, dirp->perms); - if (r < 0) - return r; - - fill_statx(diri, caps, &stx); - fill_dirent(&de, ".", S_IFDIR, stx.stx_ino, next_off); - - Inode *inode = NULL; - if (getref) { - inode = diri.get(); - _ll_get(inode); - } - - client_lock.Unlock(); - r = cb(p, &de, &stx, next_off, inode); - client_lock.Lock(); - if (r < 0) - return r; - - dirp->offset = next_off; - if (r > 0) - return r; - } - if (dirp->offset == 1) { - ldout(cct, 15) << " including .." << dendl; - uint64_t next_off = 2; - InodeRef in; - if (diri->dn_set.empty()) - in = diri; - else - in = diri->get_first_parent()->inode; - - int r; - r = _getattr(diri, caps, dirp->perms); - if (r < 0) - return r; - - fill_statx(in, caps, &stx); - fill_dirent(&de, "..", S_IFDIR, stx.stx_ino, next_off); - - Inode *inode = NULL; - if (getref) { - inode = in.get(); - _ll_get(inode); - } - - client_lock.Unlock(); - r = cb(p, &de, &stx, next_off, inode); - client_lock.Lock(); - if (r < 0) - return r; - - dirp->offset = next_off; - if (r > 0) - return r; - } - - // can we read from our cache? - ldout(cct, 10) << "offset " << hex << dirp->offset << dec - << " snapid " << dirp->inode->snapid << " (complete && ordered) " - << dirp->inode->is_complete_and_ordered() - << " issued " << ccap_string(dirp->inode->caps_issued()) - << dendl; - if (dirp->inode->snapid != CEPH_SNAPDIR && - dirp->inode->is_complete_and_ordered() && - dirp->inode->caps_issued_mask(CEPH_CAP_FILE_SHARED)) { - int err = _readdir_cache_cb(dirp, cb, p, caps, getref); - if (err != -EAGAIN) - return err; - } - - while (1) { - if (dirp->at_end()) - return 0; - - bool check_caps = true; - if (!dirp->is_cached()) { - int r = _readdir_get_frag(dirp); - if (r) - return r; - // _readdir_get_frag () may updates dirp->offset if the replied dirfrag is - // different than the requested one. (our dirfragtree was outdated) - check_caps = false; - } - frag_t fg = dirp->buffer_frag; - - ldout(cct, 10) << "frag " << fg << " buffer size " << dirp->buffer.size() - << " offset " << hex << dirp->offset << dendl; - - for (auto it = std::lower_bound(dirp->buffer.begin(), dirp->buffer.end(), - dirp->offset, dir_result_t::dentry_off_lt()); - it != dirp->buffer.end(); - ++it) { - dir_result_t::dentry &entry = *it; - - uint64_t next_off = entry.offset + 1; - - int r; - if (check_caps) { - r = _getattr(entry.inode, caps, dirp->perms); - if (r < 0) - return r; - } - - fill_statx(entry.inode, caps, &stx); - fill_dirent(&de, entry.name.c_str(), stx.stx_mode, stx.stx_ino, next_off); - - Inode *inode = NULL; - if (getref) { - inode = entry.inode.get(); - _ll_get(inode); - } - - client_lock.Unlock(); - r = cb(p, &de, &stx, next_off, inode); // _next_ offset - client_lock.Lock(); - - ldout(cct, 15) << " de " << de.d_name << " off " << hex << next_off - 1 << dec - << " = " << r << dendl; - if (r < 0) - return r; - - dirp->offset = next_off; - if (r > 0) - return r; - } - - if (dirp->next_offset > 2) { - ldout(cct, 10) << " fetching next chunk of this frag" << dendl; - _readdir_drop_dirp_buffer(dirp); - continue; // more! - } - - if (!fg.is_rightmost()) { - // next frag! - _readdir_next_frag(dirp); - continue; - } - - if (diri->shared_gen == dirp->start_shared_gen && - diri->dir_release_count == dirp->release_count) { - if (diri->dir_ordered_count == dirp->ordered_count) { - ldout(cct, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on " << *diri << dendl; - if (diri->dir) { - assert(diri->dir->readdir_cache.size() >= dirp->cache_index); - diri->dir->readdir_cache.resize(dirp->cache_index); - } - diri->flags |= I_COMPLETE | I_DIR_ORDERED; - } else { - ldout(cct, 10) << " marking I_COMPLETE on " << *diri << dendl; - diri->flags |= I_COMPLETE; - } - } - - dirp->set_end(); - return 0; - } - ceph_abort(); - return 0; -} - - -int Client::readdir_r(dir_result_t *d, struct dirent *de) -{ - return readdirplus_r(d, de, 0, 0, 0, NULL); -} - -/* - * readdirplus_r - * - * returns - * 1 if we got a dirent - * 0 for end of directory - * <0 on error - */ - -struct single_readdir { - struct dirent *de; - struct ceph_statx *stx; - Inode *inode; - bool full; -}; - -static int _readdir_single_dirent_cb(void *p, struct dirent *de, - struct ceph_statx *stx, off_t off, - Inode *in) -{ - single_readdir *c = static_cast(p); - - if (c->full) - return -1; // already filled this dirent - - *c->de = *de; - if (c->stx) - *c->stx = *stx; - c->inode = in; - c->full = true; - return 1; -} - -struct dirent *Client::readdir(dir_result_t *d) -{ - int ret; - static struct dirent de; - single_readdir sr; - sr.de = &de; - sr.stx = NULL; - sr.inode = NULL; - sr.full = false; - - // our callback fills the dirent and sets sr.full=true on first - // call, and returns -1 the second time around. - ret = readdir_r_cb(d, _readdir_single_dirent_cb, (void *)&sr); - if (ret < -1) { - errno = -ret; // this sucks. - return (dirent *) NULL; - } - if (sr.full) { - return &de; - } - return (dirent *) NULL; -} - -int Client::readdirplus_r(dir_result_t *d, struct dirent *de, - struct ceph_statx *stx, unsigned want, - unsigned flags, Inode **out) -{ - single_readdir sr; - sr.de = de; - sr.stx = stx; - sr.inode = NULL; - sr.full = false; - - // our callback fills the dirent and sets sr.full=true on first - // call, and returns -1 the second time around. - int r = readdir_r_cb(d, _readdir_single_dirent_cb, (void *)&sr, want, flags, out); - if (r < -1) - return r; - if (out) - *out = sr.inode; - if (sr.full) - return 1; - return 0; -} - - -/* getdents */ -struct getdents_result { - char *buf; - int buflen; - int pos; - bool fullent; -}; - -static int _readdir_getdent_cb(void *p, struct dirent *de, - struct ceph_statx *stx, off_t off, Inode *in) -{ - struct getdents_result *c = static_cast(p); - - int dlen; - if (c->fullent) - dlen = sizeof(*de); - else - dlen = strlen(de->d_name) + 1; - - if (c->pos + dlen > c->buflen) - return -1; // doesn't fit - - if (c->fullent) { - memcpy(c->buf + c->pos, de, sizeof(*de)); - } else { - memcpy(c->buf + c->pos, de->d_name, dlen); - } - c->pos += dlen; - return 0; -} - -int Client::_getdents(dir_result_t *dir, char *buf, int buflen, bool fullent) -{ - getdents_result gr; - gr.buf = buf; - gr.buflen = buflen; - gr.fullent = fullent; - gr.pos = 0; - - int r = readdir_r_cb(dir, _readdir_getdent_cb, (void *)&gr); - - if (r < 0) { // some error - if (r == -1) { // buffer ran out of space - if (gr.pos) { // but we got some entries already! - return gr.pos; - } // or we need a larger buffer - return -ERANGE; - } else { // actual error, return it - return r; - } - } - return gr.pos; -} - - -/* getdir */ -struct getdir_result { - list *contents; - int num; -}; - -static int _getdir_cb(void *p, struct dirent *de, struct ceph_statx *stx, off_t off, Inode *in) -{ - getdir_result *r = static_cast(p); - - r->contents->push_back(de->d_name); - r->num++; - return 0; -} - -int Client::getdir(const char *relpath, list& contents, - const UserPerm& perms) -{ - ldout(cct, 3) << "getdir(" << relpath << ")" << dendl; - { - Mutex::Locker lock(client_lock); - tout(cct) << "getdir" << std::endl; - tout(cct) << relpath << std::endl; - } - - dir_result_t *d; - int r = opendir(relpath, &d, perms); - if (r < 0) - return r; - - getdir_result gr; - gr.contents = &contents; - gr.num = 0; - r = readdir_r_cb(d, _getdir_cb, (void *)&gr); - - closedir(d); - - if (r < 0) - return r; - return gr.num; -} - - -/****** file i/o **********/ -int Client::open(const char *relpath, int flags, const UserPerm& perms, - mode_t mode, int stripe_unit, int stripe_count, - int object_size, const char *data_pool) -{ - ldout(cct, 3) << "open enter(" << relpath << ", " << ceph_flags_sys2wire(flags) << "," << mode << ")" << dendl; - Mutex::Locker lock(client_lock); - tout(cct) << "open" << std::endl; - tout(cct) << relpath << std::endl; - tout(cct) << ceph_flags_sys2wire(flags) << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *fh = NULL; - -#if defined(__linux__) && defined(O_PATH) - /* When the O_PATH is being specified, others flags than O_DIRECTORY - * and O_NOFOLLOW are ignored. Please refer do_entry_open() function - * in kernel (fs/open.c). */ - if (flags & O_PATH) - flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH; -#endif - - filepath path(relpath); - InodeRef in; - bool created = false; - /* O_CREATE with O_EXCL enforces O_NOFOLLOW. */ - bool followsym = !((flags & O_NOFOLLOW) || ((flags & O_CREAT) && (flags & O_EXCL))); - int r = path_walk(path, &in, perms, followsym, ceph_caps_for_mode(mode)); - - if (r == 0 && (flags & O_CREAT) && (flags & O_EXCL)) - return -EEXIST; - -#if defined(__linux__) && defined(O_PATH) - if (r == 0 && in->is_symlink() && (flags & O_NOFOLLOW) && !(flags & O_PATH)) -#else - if (r == 0 && in->is_symlink() && (flags & O_NOFOLLOW)) -#endif - return -ELOOP; - - if (r == -ENOENT && (flags & O_CREAT)) { - filepath dirpath = path; - string dname = dirpath.last_dentry(); - dirpath.pop_dentry(); - InodeRef dir; - r = path_walk(dirpath, &dir, perms, true, - cct->_conf->client_permissions ? CEPH_CAP_AUTH_SHARED : 0); - if (r < 0) - goto out; - if (cct->_conf->client_permissions) { - r = may_create(dir.get(), perms); - if (r < 0) - goto out; - } - r = _create(dir.get(), dname.c_str(), flags, mode, &in, &fh, stripe_unit, - stripe_count, object_size, data_pool, &created, perms); - } - if (r < 0) - goto out; - - if (!created) { - // posix says we can only check permissions of existing files - if (cct->_conf->client_permissions) { - r = may_open(in.get(), flags, perms); - if (r < 0) - goto out; - } - } - - if (!fh) - r = _open(in.get(), flags, mode, &fh, perms); - if (r >= 0) { - // allocate a integer file descriptor - assert(fh); - r = get_fd(); - assert(fd_map.count(r) == 0); - fd_map[r] = fh; - } - - out: - tout(cct) << r << std::endl; - ldout(cct, 3) << "open exit(" << path << ", " << ceph_flags_sys2wire(flags) << ") = " << r << dendl; - return r; -} - -int Client::open(const char *relpath, int flags, const UserPerm& perms, mode_t mode) -{ - /* Use default file striping parameters */ - return open(relpath, flags, perms, mode, 0, 0, 0, NULL); -} - -int Client::lookup_hash(inodeno_t ino, inodeno_t dirino, const char *name, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - ldout(cct, 3) << "lookup_hash enter(" << ino << ", #" << dirino << "/" << name << ")" << dendl; - - if (unmounting) - return -ENOTCONN; - - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_LOOKUPHASH); - filepath path(ino); - req->set_filepath(path); - - uint32_t h = ceph_str_hash(CEPH_STR_HASH_RJENKINS, name, strlen(name)); - char f[30]; - sprintf(f, "%u", h); - filepath path2(dirino); - path2.push_dentry(string(f)); - req->set_filepath2(path2); - - int r = make_request(req, perms, NULL, NULL, - rand() % mdsmap->get_num_in_mds()); - ldout(cct, 3) << "lookup_hash exit(" << ino << ", #" << dirino << "/" << name << ") = " << r << dendl; - return r; -} - - -/** - * Load inode into local cache. - * - * If inode pointer is non-NULL, and take a reference on - * the resulting Inode object in one operation, so that caller - * can safely assume inode will still be there after return. - */ -int Client::lookup_ino(inodeno_t ino, const UserPerm& perms, Inode **inode) -{ - Mutex::Locker lock(client_lock); - ldout(cct, 3) << "lookup_ino enter(" << ino << ")" << dendl; - - if (unmounting) - return -ENOTCONN; - - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_LOOKUPINO); - filepath path(ino); - req->set_filepath(path); - - int r = make_request(req, perms, NULL, NULL, rand() % mdsmap->get_num_in_mds()); - if (r == 0 && inode != NULL) { - vinodeno_t vino(ino, CEPH_NOSNAP); - unordered_map::iterator p = inode_map.find(vino); - assert(p != inode_map.end()); - *inode = p->second; - _ll_get(*inode); - } - ldout(cct, 3) << "lookup_ino exit(" << ino << ") = " << r << dendl; - return r; -} - - - -/** - * Find the parent inode of `ino` and insert it into - * our cache. Conditionally also set `parent` to a referenced - * Inode* if caller provides non-NULL value. - */ -int Client::lookup_parent(Inode *ino, const UserPerm& perms, Inode **parent) -{ - Mutex::Locker lock(client_lock); - ldout(cct, 3) << "lookup_parent enter(" << ino->ino << ")" << dendl; - - if (unmounting) - return -ENOTCONN; - - if (!ino->dn_set.empty()) { - // if we exposed the parent here, we'd need to check permissions, - // but right now we just rely on the MDS doing so in make_request - ldout(cct, 3) << "lookup_parent dentry already present" << dendl; - return 0; - } - - if (ino->is_root()) { - *parent = NULL; - ldout(cct, 3) << "ino is root, no parent" << dendl; - return -EINVAL; - } - - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_LOOKUPPARENT); - filepath path(ino->ino); - req->set_filepath(path); - - InodeRef target; - int r = make_request(req, perms, &target, NULL, rand() % mdsmap->get_num_in_mds()); - // Give caller a reference to the parent ino if they provided a pointer. - if (parent != NULL) { - if (r == 0) { - *parent = target.get(); - _ll_get(*parent); - ldout(cct, 3) << "lookup_parent found parent " << (*parent)->ino << dendl; - } else { - *parent = NULL; - } - } - ldout(cct, 3) << "lookup_parent exit(" << ino->ino << ") = " << r << dendl; - return r; -} - - -/** - * Populate the parent dentry for `ino`, provided it is - * a child of `parent`. - */ -int Client::lookup_name(Inode *ino, Inode *parent, const UserPerm& perms) -{ - assert(parent->is_dir()); - - Mutex::Locker lock(client_lock); - ldout(cct, 3) << "lookup_name enter(" << ino->ino << ")" << dendl; - - if (unmounting) - return -ENOTCONN; - - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_LOOKUPNAME); - req->set_filepath2(filepath(parent->ino)); - req->set_filepath(filepath(ino->ino)); - req->set_inode(ino); - - int r = make_request(req, perms, NULL, NULL, rand() % mdsmap->get_num_in_mds()); - ldout(cct, 3) << "lookup_name exit(" << ino->ino << ") = " << r << dendl; - return r; -} - - - Fh *Client::_create_fh(Inode *in, int flags, int cmode, const UserPerm& perms) -{ - assert(in); - Fh *f = new Fh(in); - f->mode = cmode; - f->flags = flags; - - // inode - f->actor_perms = perms; - - ldout(cct, 10) << "_create_fh " << in->ino << " mode " << cmode << dendl; - - if (in->snapid != CEPH_NOSNAP) { - in->snap_cap_refs++; - ldout(cct, 5) << "open success, fh is " << f << " combined IMMUTABLE SNAP caps " - << ccap_string(in->caps_issued()) << dendl; - } - - const md_config_t *conf = cct->_conf; - f->readahead.set_trigger_requests(1); - f->readahead.set_min_readahead_size(conf->client_readahead_min); - uint64_t max_readahead = Readahead::NO_LIMIT; - if (conf->client_readahead_max_bytes) { - max_readahead = MIN(max_readahead, (uint64_t)conf->client_readahead_max_bytes); - } - if (conf->client_readahead_max_periods) { - max_readahead = MIN(max_readahead, in->layout.get_period()*(uint64_t)conf->client_readahead_max_periods); - } - f->readahead.set_max_readahead_size(max_readahead); - vector alignments; - alignments.push_back(in->layout.get_period()); - alignments.push_back(in->layout.stripe_unit); - f->readahead.set_alignments(alignments); - - return f; -} - -int Client::_release_fh(Fh *f) -{ - //ldout(cct, 3) << "op: client->close(open_files[ " << fh << " ]);" << dendl; - //ldout(cct, 3) << "op: open_files.erase( " << fh << " );" << dendl; - Inode *in = f->inode.get(); - ldout(cct, 5) << "_release_fh " << f << " mode " << f->mode << " on " << *in << dendl; - - if (in->snapid == CEPH_NOSNAP) { - if (in->put_open_ref(f->mode)) { - _flush(in, new C_Client_FlushComplete(this, in)); - check_caps(in, 0); - } - } else { - assert(in->snap_cap_refs > 0); - in->snap_cap_refs--; - } - - _release_filelocks(f); - - // Finally, read any async err (i.e. from flushes) - int err = f->take_async_err(); - if (err != 0) { - ldout(cct, 1) << "_release_fh " << f << " on inode " << *in << " caught async_err = " - << cpp_strerror(err) << dendl; - } else { - ldout(cct, 10) << "_release_fh " << f << " on inode " << *in << " no async_err state" << dendl; - } - - _put_fh(f); - - return err; -} - -void Client::_put_fh(Fh *f) -{ - int left = f->put(); - if (!left) { - delete f; - } -} - -int Client::_open(Inode *in, int flags, mode_t mode, Fh **fhp, - const UserPerm& perms) -{ - if (in->snapid != CEPH_NOSNAP && - (flags & (O_WRONLY | O_RDWR | O_CREAT | O_TRUNC | O_APPEND))) { - return -EROFS; - } - - // use normalized flags to generate cmode - int cmode = ceph_flags_to_mode(ceph_flags_sys2wire(flags)); - if (cmode < 0) - return -EINVAL; - int want = ceph_caps_for_mode(cmode); - int result = 0; - - in->get_open_ref(cmode); // make note of pending open, since it effects _wanted_ caps. - - if ((flags & O_TRUNC) == 0 && - in->caps_issued_mask(want)) { - // update wanted? - check_caps(in, CHECK_CAPS_NODELAY); - } else { - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_OPEN); - filepath path; - in->make_nosnap_relative_path(path); - req->set_filepath(path); - req->head.args.open.flags = ceph_flags_sys2wire(flags & ~O_CREAT); - req->head.args.open.mode = mode; - req->head.args.open.pool = -1; - if (cct->_conf->client_debug_getattr_caps) - req->head.args.open.mask = DEBUG_GETATTR_CAPS; - else - req->head.args.open.mask = 0; - req->head.args.open.old_size = in->size; // for O_TRUNC - req->set_inode(in); - result = make_request(req, perms); - } - - // success? - if (result >= 0) { - if (fhp) - *fhp = _create_fh(in, flags, cmode, perms); - } else { - in->put_open_ref(cmode); - } - - trim_cache(); - - return result; -} - -int Client::_renew_caps(Inode *in) -{ - int wanted = in->caps_file_wanted(); - if (in->is_any_caps() && - ((wanted & CEPH_CAP_ANY_WR) == 0 || in->auth_cap)) { - check_caps(in, CHECK_CAPS_NODELAY); - return 0; - } - - int flags = 0; - if ((wanted & CEPH_CAP_FILE_RD) && (wanted & CEPH_CAP_FILE_WR)) - flags = O_RDWR; - else if (wanted & CEPH_CAP_FILE_RD) - flags = O_RDONLY; - else if (wanted & CEPH_CAP_FILE_WR) - flags = O_WRONLY; - - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_OPEN); - filepath path; - in->make_nosnap_relative_path(path); - req->set_filepath(path); - req->head.args.open.flags = flags; - req->head.args.open.pool = -1; - if (cct->_conf->client_debug_getattr_caps) - req->head.args.open.mask = DEBUG_GETATTR_CAPS; - else - req->head.args.open.mask = 0; - req->set_inode(in); - - // duplicate in case Cap goes away; not sure if that race is a concern? - const UserPerm *pperm = in->get_best_perms(); - UserPerm perms; - if (pperm != NULL) - perms = *pperm; - int ret = make_request(req, perms); - return ret; -} - -int Client::close(int fd) -{ - ldout(cct, 3) << "close enter(" << fd << ")" << dendl; - Mutex::Locker lock(client_lock); - tout(cct) << "close" << std::endl; - tout(cct) << fd << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *fh = get_filehandle(fd); - if (!fh) - return -EBADF; - int err = _release_fh(fh); - fd_map.erase(fd); - put_fd(fd); - ldout(cct, 3) << "close exit(" << fd << ")" << dendl; - return err; -} - - -// ------------ -// read, write - -loff_t Client::lseek(int fd, loff_t offset, int whence) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "lseek" << std::endl; - tout(cct) << fd << std::endl; - tout(cct) << offset << std::endl; - tout(cct) << whence << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; -#if defined(__linux__) && defined(O_PATH) - if (f->flags & O_PATH) - return -EBADF; -#endif - return _lseek(f, offset, whence); -} - -loff_t Client::_lseek(Fh *f, loff_t offset, int whence) -{ - Inode *in = f->inode.get(); - int r; - - switch (whence) { - case SEEK_SET: - f->pos = offset; - break; - - case SEEK_CUR: - f->pos += offset; - break; - - case SEEK_END: - r = _getattr(in, CEPH_STAT_CAP_SIZE, f->actor_perms); - if (r < 0) - return r; - f->pos = in->size + offset; - break; - - default: - ceph_abort(); - } - - ldout(cct, 3) << "_lseek(" << f << ", " << offset << ", " << whence << ") = " << f->pos << dendl; - return f->pos; -} - - -void Client::lock_fh_pos(Fh *f) -{ - ldout(cct, 10) << "lock_fh_pos " << f << dendl; - - if (f->pos_locked || !f->pos_waiters.empty()) { - Cond cond; - f->pos_waiters.push_back(&cond); - ldout(cct, 10) << "lock_fh_pos BLOCKING on " << f << dendl; - while (f->pos_locked || f->pos_waiters.front() != &cond) - cond.Wait(client_lock); - ldout(cct, 10) << "lock_fh_pos UNBLOCKING on " << f << dendl; - assert(f->pos_waiters.front() == &cond); - f->pos_waiters.pop_front(); - } - - f->pos_locked = true; -} - -void Client::unlock_fh_pos(Fh *f) -{ - ldout(cct, 10) << "unlock_fh_pos " << f << dendl; - f->pos_locked = false; -} - -int Client::uninline_data(Inode *in, Context *onfinish) -{ - if (!in->inline_data.length()) { - onfinish->complete(0); - return 0; - } - - char oid_buf[32]; - snprintf(oid_buf, sizeof(oid_buf), "%llx.00000000", (long long unsigned)in->ino); - object_t oid = oid_buf; - - ObjectOperation create_ops; - create_ops.create(false); - - objecter->mutate(oid, - OSDMap::file_to_object_locator(in->layout), - create_ops, - in->snaprealm->get_snap_context(), - ceph::real_clock::now(), - 0, - NULL); - - bufferlist inline_version_bl; - ::encode(in->inline_version, inline_version_bl); - - ObjectOperation uninline_ops; - uninline_ops.cmpxattr("inline_version", - CEPH_OSD_CMPXATTR_OP_GT, - CEPH_OSD_CMPXATTR_MODE_U64, - inline_version_bl); - bufferlist inline_data = in->inline_data; - uninline_ops.write(0, inline_data, in->truncate_size, in->truncate_seq); - uninline_ops.setxattr("inline_version", stringify(in->inline_version)); - - objecter->mutate(oid, - OSDMap::file_to_object_locator(in->layout), - uninline_ops, - in->snaprealm->get_snap_context(), - ceph::real_clock::now(), - 0, - onfinish); - - return 0; -} - -// - -// blocking osd interface - -int Client::read(int fd, char *buf, loff_t size, loff_t offset) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "read" << std::endl; - tout(cct) << fd << std::endl; - tout(cct) << size << std::endl; - tout(cct) << offset << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; -#if defined(__linux__) && defined(O_PATH) - if (f->flags & O_PATH) - return -EBADF; -#endif - bufferlist bl; - int r = _read(f, offset, size, &bl); - ldout(cct, 3) << "read(" << fd << ", " << (void*)buf << ", " << size << ", " << offset << ") = " << r << dendl; - if (r >= 0) { - bl.copy(0, bl.length(), buf); - r = bl.length(); - } - return r; -} - -int Client::preadv(int fd, const struct iovec *iov, int iovcnt, loff_t offset) -{ - if (iovcnt < 0) - return -EINVAL; - return _preadv_pwritev(fd, iov, iovcnt, offset, false); -} - -int Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl) -{ - const md_config_t *conf = cct->_conf; - Inode *in = f->inode.get(); - - if ((f->mode & CEPH_FILE_MODE_RD) == 0) - return -EBADF; - //bool lazy = f->mode == CEPH_FILE_MODE_LAZY; - - bool movepos = false; - if (offset < 0) { - lock_fh_pos(f); - offset = f->pos; - movepos = true; - } - loff_t start_pos = offset; - - if (in->inline_version == 0) { - int r = _getattr(in, CEPH_STAT_CAP_INLINE_DATA, f->actor_perms, true); - if (r < 0) { - if (movepos) - unlock_fh_pos(f); - return r; - } - assert(in->inline_version > 0); - } - -retry: - int have; - int r = get_caps(in, CEPH_CAP_FILE_RD, CEPH_CAP_FILE_CACHE, &have, -1); - if (r < 0) { - if (movepos) - unlock_fh_pos(f); - return r; - } - if (f->flags & O_DIRECT) - have &= ~CEPH_CAP_FILE_CACHE; - - Mutex uninline_flock("Client::_read_uninline_data flock"); - Cond uninline_cond; - bool uninline_done = false; - int uninline_ret = 0; - Context *onuninline = NULL; - - if (in->inline_version < CEPH_INLINE_NONE) { - if (!(have & CEPH_CAP_FILE_CACHE)) { - onuninline = new C_SafeCond(&uninline_flock, - &uninline_cond, - &uninline_done, - &uninline_ret); - uninline_data(in, onuninline); - } else { - uint32_t len = in->inline_data.length(); - - uint64_t endoff = offset + size; - if (endoff > in->size) - endoff = in->size; - - if (offset < len) { - if (endoff <= len) { - bl->substr_of(in->inline_data, offset, endoff - offset); - } else { - bl->substr_of(in->inline_data, offset, len - offset); - bl->append_zero(endoff - len); - } - } else if ((uint64_t)offset < endoff) { - bl->append_zero(endoff - offset); - } - - goto success; - } - } - - if (!conf->client_debug_force_sync_read && - (conf->client_oc && (have & CEPH_CAP_FILE_CACHE))) { - - if (f->flags & O_RSYNC) { - _flush_range(in, offset, size); - } - r = _read_async(f, offset, size, bl); - if (r < 0) - goto done; - } else { - if (f->flags & O_DIRECT) - _flush_range(in, offset, size); - - bool checkeof = false; - r = _read_sync(f, offset, size, bl, &checkeof); - if (r < 0) - goto done; - if (checkeof) { - offset += r; - size -= r; - - put_cap_ref(in, CEPH_CAP_FILE_RD); - have = 0; - // reverify size - r = _getattr(in, CEPH_STAT_CAP_SIZE, f->actor_perms); - if (r < 0) - goto done; - - // eof? short read. - if ((uint64_t)offset < in->size) - goto retry; - } - } - -success: - if (movepos) { - // adjust fd pos - f->pos = start_pos + bl->length(); - unlock_fh_pos(f); - } - -done: - // done! - - if (onuninline) { - client_lock.Unlock(); - uninline_flock.Lock(); - while (!uninline_done) - uninline_cond.Wait(uninline_flock); - uninline_flock.Unlock(); - client_lock.Lock(); - - if (uninline_ret >= 0 || uninline_ret == -ECANCELED) { - in->inline_data.clear(); - in->inline_version = CEPH_INLINE_NONE; - mark_caps_dirty(in, CEPH_CAP_FILE_WR); - check_caps(in, 0); - } else - r = uninline_ret; - } - - if (have) - put_cap_ref(in, CEPH_CAP_FILE_RD); - if (r < 0) { - if (movepos) - unlock_fh_pos(f); - return r; - } else - return bl->length(); -} - -Client::C_Readahead::C_Readahead(Client *c, Fh *f) : - client(c), f(f) { - f->get(); - f->readahead.inc_pending(); -} - -Client::C_Readahead::~C_Readahead() { - f->readahead.dec_pending(); - client->_put_fh(f); -} - -void Client::C_Readahead::finish(int r) { - lgeneric_subdout(client->cct, client, 20) << "client." << client->get_nodeid() << " " << "C_Readahead on " << f->inode << dendl; - client->put_cap_ref(f->inode.get(), CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE); -} - -int Client::_read_async(Fh *f, uint64_t off, uint64_t len, bufferlist *bl) -{ - const md_config_t *conf = cct->_conf; - Inode *in = f->inode.get(); - - ldout(cct, 10) << "_read_async " << *in << " " << off << "~" << len << dendl; - - // trim read based on file size? - if (off >= in->size) - return 0; - if (len == 0) - return 0; - if (off + len > in->size) { - len = in->size - off; - } - - ldout(cct, 10) << " min_bytes=" << f->readahead.get_min_readahead_size() - << " max_bytes=" << f->readahead.get_max_readahead_size() - << " max_periods=" << conf->client_readahead_max_periods << dendl; - - // read (and possibly block) - int r, rvalue = 0; - Mutex flock("Client::_read_async flock"); - Cond cond; - bool done = false; - Context *onfinish = new C_SafeCond(&flock, &cond, &done, &rvalue); - r = objectcacher->file_read(&in->oset, &in->layout, in->snapid, - off, len, bl, 0, onfinish); - if (r == 0) { - get_cap_ref(in, CEPH_CAP_FILE_CACHE); - client_lock.Unlock(); - flock.Lock(); - while (!done) - cond.Wait(flock); - flock.Unlock(); - client_lock.Lock(); - put_cap_ref(in, CEPH_CAP_FILE_CACHE); - r = rvalue; - } else { - // it was cached. - delete onfinish; - } - - if(f->readahead.get_min_readahead_size() > 0) { - pair readahead_extent = f->readahead.update(off, len, in->size); - if (readahead_extent.second > 0) { - ldout(cct, 20) << "readahead " << readahead_extent.first << "~" << readahead_extent.second - << " (caller wants " << off << "~" << len << ")" << dendl; - Context *onfinish2 = new C_Readahead(this, f); - int r2 = objectcacher->file_read(&in->oset, &in->layout, in->snapid, - readahead_extent.first, readahead_extent.second, - NULL, 0, onfinish2); - if (r2 == 0) { - ldout(cct, 20) << "readahead initiated, c " << onfinish2 << dendl; - get_cap_ref(in, CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE); - } else { - ldout(cct, 20) << "readahead was no-op, already cached" << dendl; - delete onfinish2; - } - } - } - - return r; -} - -int Client::_read_sync(Fh *f, uint64_t off, uint64_t len, bufferlist *bl, - bool *checkeof) -{ - Inode *in = f->inode.get(); - uint64_t pos = off; - int left = len; - int read = 0; - - ldout(cct, 10) << "_read_sync " << *in << " " << off << "~" << len << dendl; - - Mutex flock("Client::_read_sync flock"); - Cond cond; - while (left > 0) { - int r = 0; - bool done = false; - Context *onfinish = new C_SafeCond(&flock, &cond, &done, &r); - bufferlist tbl; - - int wanted = left; - filer->read_trunc(in->ino, &in->layout, in->snapid, - pos, left, &tbl, 0, - in->truncate_size, in->truncate_seq, - onfinish); - client_lock.Unlock(); - flock.Lock(); - while (!done) - cond.Wait(flock); - flock.Unlock(); - client_lock.Lock(); - - // if we get ENOENT from OSD, assume 0 bytes returned - if (r == -ENOENT) - r = 0; - if (r < 0) - return r; - if (tbl.length()) { - r = tbl.length(); - - read += r; - pos += r; - left -= r; - bl->claim_append(tbl); - } - // short read? - if (r >= 0 && r < wanted) { - if (pos < in->size) { - // zero up to known EOF - int64_t some = in->size - pos; - if (some > left) - some = left; - bufferptr z(some); - z.zero(); - bl->push_back(z); - read += some; - pos += some; - left -= some; - if (left == 0) - return read; - } - - *checkeof = true; - return read; - } - } - return read; -} - - -/* - * we keep count of uncommitted sync writes on the inode, so that - * fsync can DDRT. - */ -void Client::_sync_write_commit(Inode *in) -{ - assert(unsafe_sync_write > 0); - unsafe_sync_write--; - - put_cap_ref(in, CEPH_CAP_FILE_BUFFER); - - ldout(cct, 15) << "sync_write_commit unsafe_sync_write = " << unsafe_sync_write << dendl; - if (unsafe_sync_write == 0 && unmounting) { - ldout(cct, 10) << "sync_write_commit -- no more unsafe writes, unmount can proceed" << dendl; - mount_cond.Signal(); - } -} - -int Client::write(int fd, const char *buf, loff_t size, loff_t offset) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "write" << std::endl; - tout(cct) << fd << std::endl; - tout(cct) << size << std::endl; - tout(cct) << offset << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *fh = get_filehandle(fd); - if (!fh) - return -EBADF; -#if defined(__linux__) && defined(O_PATH) - if (fh->flags & O_PATH) - return -EBADF; -#endif - int r = _write(fh, offset, size, buf, NULL, 0); - ldout(cct, 3) << "write(" << fd << ", \"...\", " << size << ", " << offset << ") = " << r << dendl; - return r; -} - -int Client::pwritev(int fd, const struct iovec *iov, int iovcnt, int64_t offset) -{ - if (iovcnt < 0) - return -EINVAL; - return _preadv_pwritev(fd, iov, iovcnt, offset, true); -} - -int Client::_preadv_pwritev(int fd, const struct iovec *iov, unsigned iovcnt, int64_t offset, bool write) -{ - Mutex::Locker lock(client_lock); - tout(cct) << fd << std::endl; - tout(cct) << offset << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *fh = get_filehandle(fd); - if (!fh) - return -EBADF; -#if defined(__linux__) && defined(O_PATH) - if (fh->flags & O_PATH) - return -EBADF; -#endif - loff_t totallen = 0; - for (unsigned i = 0; i < iovcnt; i++) { - totallen += iov[i].iov_len; - } - if (write) { - int w = _write(fh, offset, totallen, NULL, iov, iovcnt); - ldout(cct, 3) << "pwritev(" << fd << ", \"...\", " << totallen << ", " << offset << ") = " << w << dendl; - return w; - } else { - bufferlist bl; - int r = _read(fh, offset, totallen, &bl); - ldout(cct, 3) << "preadv(" << fd << ", " << offset << ") = " << r << dendl; - if (r <= 0) - return r; - - int bufoff = 0; - for (unsigned j = 0, resid = r; j < iovcnt && resid > 0; j++) { - /* - * This piece of code aims to handle the case that bufferlist does not have enough data - * to fill in the iov - */ - if (resid < iov[j].iov_len) { - bl.copy(bufoff, resid, (char *)iov[j].iov_base); - break; - } else { - bl.copy(bufoff, iov[j].iov_len, (char *)iov[j].iov_base); - } - resid -= iov[j].iov_len; - bufoff += iov[j].iov_len; - } - return r; - } -} - -int Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf, - const struct iovec *iov, int iovcnt) -{ - if ((uint64_t)(offset+size) > mdsmap->get_max_filesize()) //too large! - return -EFBIG; - - //ldout(cct, 7) << "write fh " << fh << " size " << size << " offset " << offset << dendl; - Inode *in = f->inode.get(); - - if (objecter->osdmap_pool_full(in->layout.pool_id)) { - return -ENOSPC; - } - - assert(in->snapid == CEPH_NOSNAP); - - // was Fh opened as writeable? - if ((f->mode & CEPH_FILE_MODE_WR) == 0) - return -EBADF; - - // check quota - uint64_t endoff = offset + size; - if (endoff > in->size && is_quota_bytes_exceeded(in, endoff - in->size, - f->actor_perms)) { - return -EDQUOT; - } - - // use/adjust fd pos? - if (offset < 0) { - lock_fh_pos(f); - /* - * FIXME: this is racy in that we may block _after_ this point waiting for caps, and size may - * change out from under us. - */ - if (f->flags & O_APPEND) { - int r = _lseek(f, 0, SEEK_END); - if (r < 0) { - unlock_fh_pos(f); - return r; - } - } - offset = f->pos; - f->pos = offset+size; - unlock_fh_pos(f); - } - - //bool lazy = f->mode == CEPH_FILE_MODE_LAZY; - - ldout(cct, 10) << "cur file size is " << in->size << dendl; - - // time it. - utime_t start = ceph_clock_now(); - - if (in->inline_version == 0) { - int r = _getattr(in, CEPH_STAT_CAP_INLINE_DATA, f->actor_perms, true); - if (r < 0) - return r; - assert(in->inline_version > 0); - } - - // copy into fresh buffer (since our write may be resub, async) - bufferlist bl; - if (buf) { - if (size > 0) - bl.append(buf, size); - } else if (iov){ - for (int i = 0; i < iovcnt; i++) { - if (iov[i].iov_len > 0) { - bl.append((const char *)iov[i].iov_base, iov[i].iov_len); - } - } - } - - utime_t lat; - uint64_t totalwritten; - int have; - int r = get_caps(in, CEPH_CAP_FILE_WR|CEPH_CAP_AUTH_SHARED, - CEPH_CAP_FILE_BUFFER, &have, endoff); - if (r < 0) - return r; - - /* clear the setuid/setgid bits, if any */ - if (unlikely(in->mode & (S_ISUID|S_ISGID)) && size > 0) { - struct ceph_statx stx = { 0 }; - - put_cap_ref(in, CEPH_CAP_AUTH_SHARED); - r = __setattrx(in, &stx, CEPH_SETATTR_KILL_SGUID, f->actor_perms); - if (r < 0) - return r; - } else { - put_cap_ref(in, CEPH_CAP_AUTH_SHARED); - } - - if (f->flags & O_DIRECT) - have &= ~CEPH_CAP_FILE_BUFFER; - - ldout(cct, 10) << " snaprealm " << *in->snaprealm << dendl; - - Mutex uninline_flock("Client::_write_uninline_data flock"); - Cond uninline_cond; - bool uninline_done = false; - int uninline_ret = 0; - Context *onuninline = NULL; - - if (in->inline_version < CEPH_INLINE_NONE) { - if (endoff > cct->_conf->client_max_inline_size || - endoff > CEPH_INLINE_MAX_SIZE || - !(have & CEPH_CAP_FILE_BUFFER)) { - onuninline = new C_SafeCond(&uninline_flock, - &uninline_cond, - &uninline_done, - &uninline_ret); - uninline_data(in, onuninline); - } else { - get_cap_ref(in, CEPH_CAP_FILE_BUFFER); - - uint32_t len = in->inline_data.length(); - - if (endoff < len) - in->inline_data.copy(endoff, len - endoff, bl); - - if (offset < len) - in->inline_data.splice(offset, len - offset); - else if (offset > len) - in->inline_data.append_zero(offset - len); - - in->inline_data.append(bl); - in->inline_version++; - - put_cap_ref(in, CEPH_CAP_FILE_BUFFER); - - goto success; - } - } - - if (cct->_conf->client_oc && (have & CEPH_CAP_FILE_BUFFER)) { - // do buffered write - if (!in->oset.dirty_or_tx) - get_cap_ref(in, CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_BUFFER); - - get_cap_ref(in, CEPH_CAP_FILE_BUFFER); - - // async, caching, non-blocking. - r = objectcacher->file_write(&in->oset, &in->layout, - in->snaprealm->get_snap_context(), - offset, size, bl, ceph::real_clock::now(), - 0); - put_cap_ref(in, CEPH_CAP_FILE_BUFFER); - - if (r < 0) - goto done; - - // flush cached write if O_SYNC is set on file fh - // O_DSYNC == O_SYNC on linux < 2.6.33 - // O_SYNC = __O_SYNC | O_DSYNC on linux >= 2.6.33 - if ((f->flags & O_SYNC) || (f->flags & O_DSYNC)) { - _flush_range(in, offset, size); - } - } else { - if (f->flags & O_DIRECT) - _flush_range(in, offset, size); - - // simple, non-atomic sync write - Mutex flock("Client::_write flock"); - Cond cond; - bool done = false; - Context *onfinish = new C_SafeCond(&flock, &cond, &done); - - unsafe_sync_write++; - get_cap_ref(in, CEPH_CAP_FILE_BUFFER); // released by onsafe callback - - filer->write_trunc(in->ino, &in->layout, in->snaprealm->get_snap_context(), - offset, size, bl, ceph::real_clock::now(), 0, - in->truncate_size, in->truncate_seq, - onfinish); - client_lock.Unlock(); - flock.Lock(); - - while (!done) - cond.Wait(flock); - flock.Unlock(); - client_lock.Lock(); - _sync_write_commit(in); - } - - // if we get here, write was successful, update client metadata -success: - // time - lat = ceph_clock_now(); - lat -= start; - logger->tinc(l_c_wrlat, lat); - - totalwritten = size; - r = (int)totalwritten; - - // extend file? - if (totalwritten + offset > in->size) { - in->size = totalwritten + offset; - mark_caps_dirty(in, CEPH_CAP_FILE_WR); - - if (is_quota_bytes_approaching(in, f->actor_perms)) { - check_caps(in, CHECK_CAPS_NODELAY); - } else if (is_max_size_approaching(in)) { - check_caps(in, 0); - } - - ldout(cct, 7) << "wrote to " << totalwritten+offset << ", extending file size" << dendl; - } else { - ldout(cct, 7) << "wrote to " << totalwritten+offset << ", leaving file size at " << in->size << dendl; - } - - // mtime - in->mtime = ceph_clock_now(); - in->change_attr++; - mark_caps_dirty(in, CEPH_CAP_FILE_WR); - -done: - - if (onuninline) { - client_lock.Unlock(); - uninline_flock.Lock(); - while (!uninline_done) - uninline_cond.Wait(uninline_flock); - uninline_flock.Unlock(); - client_lock.Lock(); - - if (uninline_ret >= 0 || uninline_ret == -ECANCELED) { - in->inline_data.clear(); - in->inline_version = CEPH_INLINE_NONE; - mark_caps_dirty(in, CEPH_CAP_FILE_WR); - check_caps(in, 0); - } else - r = uninline_ret; - } - - put_cap_ref(in, CEPH_CAP_FILE_WR); - return r; -} - -int Client::_flush(Fh *f) -{ - Inode *in = f->inode.get(); - int err = f->take_async_err(); - if (err != 0) { - ldout(cct, 1) << __func__ << ": " << f << " on inode " << *in << " caught async_err = " - << cpp_strerror(err) << dendl; - } else { - ldout(cct, 10) << __func__ << ": " << f << " on inode " << *in << " no async_err state" << dendl; - } - - return err; -} - -int Client::truncate(const char *relpath, loff_t length, const UserPerm& perms) -{ - struct ceph_statx stx; - stx.stx_size = length; - return setattrx(relpath, &stx, CEPH_SETATTR_SIZE, perms); -} - -int Client::ftruncate(int fd, loff_t length, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "ftruncate" << std::endl; - tout(cct) << fd << std::endl; - tout(cct) << length << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; -#if defined(__linux__) && defined(O_PATH) - if (f->flags & O_PATH) - return -EBADF; -#endif - struct stat attr; - attr.st_size = length; - return _setattr(f->inode, &attr, CEPH_SETATTR_SIZE, perms); -} - -int Client::fsync(int fd, bool syncdataonly) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "fsync" << std::endl; - tout(cct) << fd << std::endl; - tout(cct) << syncdataonly << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; -#if defined(__linux__) && defined(O_PATH) - if (f->flags & O_PATH) - return -EBADF; -#endif - int r = _fsync(f, syncdataonly); - if (r == 0) { - // The IOs in this fsync were okay, but maybe something happened - // in the background that we shoudl be reporting? - r = f->take_async_err(); - ldout(cct, 3) << "fsync(" << fd << ", " << syncdataonly - << ") = 0, async_err = " << r << dendl; - } else { - // Assume that an error we encountered during fsync, even reported - // synchronously, would also have applied the error to the Fh, and we - // should clear it here to avoid returning the same error again on next - // call. - ldout(cct, 3) << "fsync(" << fd << ", " << syncdataonly << ") = " - << r << dendl; - f->take_async_err(); - } - return r; -} - -int Client::_fsync(Inode *in, bool syncdataonly) -{ - int r = 0; - Mutex lock("Client::_fsync::lock"); - Cond cond; - bool done = false; - C_SafeCond *object_cacher_completion = NULL; - ceph_tid_t flush_tid = 0; - InodeRef tmp_ref; - - ldout(cct, 3) << "_fsync on " << *in << " " << (syncdataonly ? "(dataonly)":"(data+metadata)") << dendl; - - if (cct->_conf->client_oc) { - object_cacher_completion = new C_SafeCond(&lock, &cond, &done, &r); - tmp_ref = in; // take a reference; C_SafeCond doesn't and _flush won't either - _flush(in, object_cacher_completion); - ldout(cct, 15) << "using return-valued form of _fsync" << dendl; - } - - if (!syncdataonly && in->dirty_caps) { - check_caps(in, CHECK_CAPS_NODELAY|CHECK_CAPS_SYNCHRONOUS); - if (in->flushing_caps) - flush_tid = last_flush_tid; - } else ldout(cct, 10) << "no metadata needs to commit" << dendl; - - if (!syncdataonly && !in->unsafe_ops.empty()) { - MetaRequest *req = in->unsafe_ops.back(); - ldout(cct, 15) << "waiting on unsafe requests, last tid " << req->get_tid() << dendl; - - req->get(); - wait_on_list(req->waitfor_safe); - put_request(req); - } - - if (object_cacher_completion) { // wait on a real reply instead of guessing - client_lock.Unlock(); - lock.Lock(); - ldout(cct, 15) << "waiting on data to flush" << dendl; - while (!done) - cond.Wait(lock); - lock.Unlock(); - client_lock.Lock(); - ldout(cct, 15) << "got " << r << " from flush writeback" << dendl; - } else { - // FIXME: this can starve - while (in->cap_refs[CEPH_CAP_FILE_BUFFER] > 0) { - ldout(cct, 10) << "ino " << in->ino << " has " << in->cap_refs[CEPH_CAP_FILE_BUFFER] - << " uncommitted, waiting" << dendl; - wait_on_list(in->waitfor_commit); - } - } - - if (!r) { - if (flush_tid > 0) - wait_sync_caps(in, flush_tid); - - ldout(cct, 10) << "ino " << in->ino << " has no uncommitted writes" << dendl; - } else { - ldout(cct, 1) << "ino " << in->ino << " failed to commit to disk! " - << cpp_strerror(-r) << dendl; - } - - return r; -} - -int Client::_fsync(Fh *f, bool syncdataonly) -{ - ldout(cct, 3) << "_fsync(" << f << ", " << (syncdataonly ? "dataonly)":"data+metadata)") << dendl; - return _fsync(f->inode.get(), syncdataonly); -} - -int Client::fstat(int fd, struct stat *stbuf, const UserPerm& perms, int mask) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "fstat mask " << hex << mask << dec << std::endl; - tout(cct) << fd << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; - int r = _getattr(f->inode, mask, perms); - if (r < 0) - return r; - fill_stat(f->inode, stbuf, NULL); - ldout(cct, 3) << "fstat(" << fd << ", " << stbuf << ") = " << r << dendl; - return r; -} - -int Client::fstatx(int fd, struct ceph_statx *stx, const UserPerm& perms, - unsigned int want, unsigned int flags) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "fstatx flags " << hex << flags << " want " << want << dec << std::endl; - tout(cct) << fd << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; - - unsigned mask = statx_to_mask(flags, want); - - int r = 0; - if (mask && !f->inode->caps_issued_mask(mask)) { - r = _getattr(f->inode, mask, perms); - if (r < 0) { - ldout(cct, 3) << "fstatx exit on error!" << dendl; - return r; - } - } - - fill_statx(f->inode, mask, stx); - ldout(cct, 3) << "fstatx(" << fd << ", " << stx << ") = " << r << dendl; - return r; -} - -// not written yet, but i want to link! - -int Client::chdir(const char *relpath, std::string &new_cwd, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "chdir" << std::endl; - tout(cct) << relpath << std::endl; - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - int r = path_walk(path, &in, perms); - if (r < 0) - return r; - if (cwd != in) - cwd.swap(in); - ldout(cct, 3) << "chdir(" << relpath << ") cwd now " << cwd->ino << dendl; - - _getcwd(new_cwd, perms); - return 0; -} - -void Client::_getcwd(string& dir, const UserPerm& perms) -{ - filepath path; - ldout(cct, 10) << "getcwd " << *cwd << dendl; - - Inode *in = cwd.get(); - while (in != root) { - assert(in->dn_set.size() < 2); // dirs can't be hard-linked - - // A cwd or ancester is unlinked - if (in->dn_set.empty()) { - return; - } - - Dentry *dn = in->get_first_parent(); - - - if (!dn) { - // look it up - ldout(cct, 10) << "getcwd looking up parent for " << *in << dendl; - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_LOOKUPNAME); - filepath path(in->ino); - req->set_filepath(path); - req->set_inode(in); - int res = make_request(req, perms); - if (res < 0) - break; - - // start over - path = filepath(); - in = cwd.get(); - continue; - } - path.push_front_dentry(dn->name); - in = dn->dir->parent_inode; - } - dir = "/"; - dir += path.get_path(); -} - -void Client::getcwd(string& dir, const UserPerm& perms) -{ - Mutex::Locker l(client_lock); - if (!unmounting) - _getcwd(dir, perms); -} - -int Client::statfs(const char *path, struct statvfs *stbuf, - const UserPerm& perms) -{ - Mutex::Locker l(client_lock); - tout(cct) << "statfs" << std::endl; - - if (unmounting) - return -ENOTCONN; - - ceph_statfs stats; - C_SaferCond cond; - - const vector &data_pools = mdsmap->get_data_pools(); - if (data_pools.size() == 1) { - objecter->get_fs_stats(stats, data_pools[0], &cond); - } else { - objecter->get_fs_stats(stats, boost::optional(), &cond); - } - - client_lock.Unlock(); - int rval = cond.wait(); - client_lock.Lock(); - - if (rval < 0) { - ldout(cct, 1) << "underlying call to statfs returned error: " - << cpp_strerror(rval) - << dendl; - return rval; - } - - memset(stbuf, 0, sizeof(*stbuf)); - - /* - * we're going to set a block size of 4MB so we can represent larger - * FSes without overflowing. Additionally convert the space - * measurements from KB to bytes while making them in terms of - * blocks. We use 4MB only because it is big enough, and because it - * actually *is* the (ceph) default block size. - */ - const int CEPH_BLOCK_SHIFT = 22; - stbuf->f_frsize = 1 << CEPH_BLOCK_SHIFT; - stbuf->f_bsize = 1 << CEPH_BLOCK_SHIFT; - stbuf->f_files = stats.num_objects; - stbuf->f_ffree = -1; - stbuf->f_favail = -1; - stbuf->f_fsid = -1; // ?? - stbuf->f_flag = 0; // ?? - stbuf->f_namemax = NAME_MAX; - - // Usually quota_root will == root_ancestor, but if the mount root has no - // quota but we can see a parent of it that does have a quota, we'll - // respect that one instead. - assert(root != nullptr); - Inode *quota_root = root->quota.is_enable() ? root : get_quota_root(root, perms); - - // get_quota_root should always give us something - // because client quotas are always enabled - assert(quota_root != nullptr); - - if (quota_root && cct->_conf->client_quota_df && quota_root->quota.max_bytes) { - - // Skip the getattr if any sessions are stale, as we don't want to - // block `df` if this client has e.g. been evicted, or if the MDS cluster - // is unhealthy. - if (!_any_stale_sessions()) { - int r = _getattr(quota_root, 0, perms, true); - if (r != 0) { - // Ignore return value: error getting latest inode metadata is not a good - // reason to break "df". - lderr(cct) << "Error in getattr on quota root 0x" - << std::hex << quota_root->ino << std::dec - << " statfs result may be outdated" << dendl; - } - } - - // Special case: if there is a size quota set on the Inode acting - // as the root for this client mount, then report the quota status - // as the filesystem statistics. - const fsblkcnt_t total = quota_root->quota.max_bytes >> CEPH_BLOCK_SHIFT; - const fsblkcnt_t used = quota_root->rstat.rbytes >> CEPH_BLOCK_SHIFT; - // It is possible for a quota to be exceeded: arithmetic here must - // handle case where used > total. - const fsblkcnt_t free = total > used ? total - used : 0; - - stbuf->f_blocks = total; - stbuf->f_bfree = free; - stbuf->f_bavail = free; - } else { - // General case: report the cluster statistics returned from RADOS. Because - // multiple pools may be used without one filesystem namespace via - // layouts, this is the most correct thing we can do. - stbuf->f_blocks = stats.kb >> (CEPH_BLOCK_SHIFT - 10); - stbuf->f_bfree = stats.kb_avail >> (CEPH_BLOCK_SHIFT - 10); - stbuf->f_bavail = stats.kb_avail >> (CEPH_BLOCK_SHIFT - 10); - } - - return rval; -} - -int Client::_do_filelock(Inode *in, Fh *fh, int lock_type, int op, int sleep, - struct flock *fl, uint64_t owner, bool removing) -{ - ldout(cct, 10) << "_do_filelock ino " << in->ino - << (lock_type == CEPH_LOCK_FCNTL ? " fcntl" : " flock") - << " type " << fl->l_type << " owner " << owner - << " " << fl->l_start << "~" << fl->l_len << dendl; - - int lock_cmd; - if (F_RDLCK == fl->l_type) - lock_cmd = CEPH_LOCK_SHARED; - else if (F_WRLCK == fl->l_type) - lock_cmd = CEPH_LOCK_EXCL; - else if (F_UNLCK == fl->l_type) - lock_cmd = CEPH_LOCK_UNLOCK; - else - return -EIO; - - if (op != CEPH_MDS_OP_SETFILELOCK || lock_cmd == CEPH_LOCK_UNLOCK) - sleep = 0; - - /* - * Set the most significant bit, so that MDS knows the 'owner' - * is sufficient to identify the owner of lock. (old code uses - * both 'owner' and 'pid') - */ - owner |= (1ULL << 63); - - MetaRequest *req = new MetaRequest(op); - filepath path; - in->make_nosnap_relative_path(path); - req->set_filepath(path); - req->set_inode(in); - - req->head.args.filelock_change.rule = lock_type; - req->head.args.filelock_change.type = lock_cmd; - req->head.args.filelock_change.owner = owner; - req->head.args.filelock_change.pid = fl->l_pid; - req->head.args.filelock_change.start = fl->l_start; - req->head.args.filelock_change.length = fl->l_len; - req->head.args.filelock_change.wait = sleep; - - int ret; - bufferlist bl; - - if (sleep && switch_interrupt_cb) { - // enable interrupt - switch_interrupt_cb(callback_handle, req->get()); - ret = make_request(req, fh->actor_perms, NULL, NULL, -1, &bl); - // disable interrupt - switch_interrupt_cb(callback_handle, NULL); - if (ret == 0 && req->aborted()) { - // effect of this lock request has been revoked by the 'lock intr' request - ret = req->get_abort_code(); - } - put_request(req); - } else { - ret = make_request(req, fh->actor_perms, NULL, NULL, -1, &bl); - } - - if (ret == 0) { - if (op == CEPH_MDS_OP_GETFILELOCK) { - ceph_filelock filelock; - bufferlist::iterator p = bl.begin(); - ::decode(filelock, p); - - if (CEPH_LOCK_SHARED == filelock.type) - fl->l_type = F_RDLCK; - else if (CEPH_LOCK_EXCL == filelock.type) - fl->l_type = F_WRLCK; - else - fl->l_type = F_UNLCK; - - fl->l_whence = SEEK_SET; - fl->l_start = filelock.start; - fl->l_len = filelock.length; - fl->l_pid = filelock.pid; - } else if (op == CEPH_MDS_OP_SETFILELOCK) { - ceph_lock_state_t *lock_state; - if (lock_type == CEPH_LOCK_FCNTL) { - if (!in->fcntl_locks) - in->fcntl_locks = new ceph_lock_state_t(cct, CEPH_LOCK_FCNTL); - lock_state = in->fcntl_locks; - } else if (lock_type == CEPH_LOCK_FLOCK) { - if (!in->flock_locks) - in->flock_locks = new ceph_lock_state_t(cct, CEPH_LOCK_FLOCK); - lock_state = in->flock_locks; - } else { - ceph_abort(); - return -EINVAL; - } - _update_lock_state(fl, owner, lock_state); - - if (!removing) { - if (lock_type == CEPH_LOCK_FCNTL) { - if (!fh->fcntl_locks) - fh->fcntl_locks = new ceph_lock_state_t(cct, CEPH_LOCK_FCNTL); - lock_state = fh->fcntl_locks; - } else { - if (!fh->flock_locks) - fh->flock_locks = new ceph_lock_state_t(cct, CEPH_LOCK_FLOCK); - lock_state = fh->flock_locks; - } - _update_lock_state(fl, owner, lock_state); - } - } else - ceph_abort(); - } - return ret; -} - -int Client::_interrupt_filelock(MetaRequest *req) -{ - // Set abort code, but do not kick. The abort code prevents the request - // from being re-sent. - req->abort(-EINTR); - if (req->mds < 0) - return 0; // haven't sent the request - - Inode *in = req->inode(); - - int lock_type; - if (req->head.args.filelock_change.rule == CEPH_LOCK_FLOCK) - lock_type = CEPH_LOCK_FLOCK_INTR; - else if (req->head.args.filelock_change.rule == CEPH_LOCK_FCNTL) - lock_type = CEPH_LOCK_FCNTL_INTR; - else { - ceph_abort(); - return -EINVAL; - } - - MetaRequest *intr_req = new MetaRequest(CEPH_MDS_OP_SETFILELOCK); - filepath path; - in->make_nosnap_relative_path(path); - intr_req->set_filepath(path); - intr_req->set_inode(in); - intr_req->head.args.filelock_change = req->head.args.filelock_change; - intr_req->head.args.filelock_change.rule = lock_type; - intr_req->head.args.filelock_change.type = CEPH_LOCK_UNLOCK; - - UserPerm perms(req->get_uid(), req->get_gid()); - return make_request(intr_req, perms, NULL, NULL, -1); -} - -void Client::_encode_filelocks(Inode *in, bufferlist& bl) -{ - if (!in->fcntl_locks && !in->flock_locks) - return; - - unsigned nr_fcntl_locks = in->fcntl_locks ? in->fcntl_locks->held_locks.size() : 0; - ::encode(nr_fcntl_locks, bl); - if (nr_fcntl_locks) { - ceph_lock_state_t* lock_state = in->fcntl_locks; - for(multimap::iterator p = lock_state->held_locks.begin(); - p != lock_state->held_locks.end(); - ++p) - ::encode(p->second, bl); - } - - unsigned nr_flock_locks = in->flock_locks ? in->flock_locks->held_locks.size() : 0; - ::encode(nr_flock_locks, bl); - if (nr_flock_locks) { - ceph_lock_state_t* lock_state = in->flock_locks; - for(multimap::iterator p = lock_state->held_locks.begin(); - p != lock_state->held_locks.end(); - ++p) - ::encode(p->second, bl); - } - - ldout(cct, 10) << "_encode_filelocks ino " << in->ino << ", " << nr_fcntl_locks - << " fcntl locks, " << nr_flock_locks << " flock locks" << dendl; -} - -void Client::_release_filelocks(Fh *fh) -{ - if (!fh->fcntl_locks && !fh->flock_locks) - return; - - Inode *in = fh->inode.get(); - ldout(cct, 10) << "_release_filelocks " << fh << " ino " << in->ino << dendl; - - list > to_release; - - if (fh->fcntl_locks) { - ceph_lock_state_t* lock_state = fh->fcntl_locks; - for(multimap::iterator p = lock_state->held_locks.begin(); - p != lock_state->held_locks.end(); - ++p) - to_release.push_back(pair(CEPH_LOCK_FCNTL, p->second)); - delete fh->fcntl_locks; - } - if (fh->flock_locks) { - ceph_lock_state_t* lock_state = fh->flock_locks; - for(multimap::iterator p = lock_state->held_locks.begin(); - p != lock_state->held_locks.end(); - ++p) - to_release.push_back(pair(CEPH_LOCK_FLOCK, p->second)); - delete fh->flock_locks; - } - - if (to_release.empty()) - return; - - struct flock fl; - memset(&fl, 0, sizeof(fl)); - fl.l_whence = SEEK_SET; - fl.l_type = F_UNLCK; - - for (list >::iterator p = to_release.begin(); - p != to_release.end(); - ++p) { - fl.l_start = p->second.start; - fl.l_len = p->second.length; - fl.l_pid = p->second.pid; - _do_filelock(in, fh, p->first, CEPH_MDS_OP_SETFILELOCK, 0, &fl, - p->second.owner, true); - } -} - -void Client::_update_lock_state(struct flock *fl, uint64_t owner, - ceph_lock_state_t *lock_state) -{ - int lock_cmd; - if (F_RDLCK == fl->l_type) - lock_cmd = CEPH_LOCK_SHARED; - else if (F_WRLCK == fl->l_type) - lock_cmd = CEPH_LOCK_EXCL; - else - lock_cmd = CEPH_LOCK_UNLOCK;; - - ceph_filelock filelock; - filelock.start = fl->l_start; - filelock.length = fl->l_len; - filelock.client = 0; - // see comment in _do_filelock() - filelock.owner = owner | (1ULL << 63); - filelock.pid = fl->l_pid; - filelock.type = lock_cmd; - - if (filelock.type == CEPH_LOCK_UNLOCK) { - list activated_locks; - lock_state->remove_lock(filelock, activated_locks); - } else { - bool r = lock_state->add_lock(filelock, false, false, NULL); - assert(r); - } -} - -int Client::_getlk(Fh *fh, struct flock *fl, uint64_t owner) -{ - Inode *in = fh->inode.get(); - ldout(cct, 10) << "_getlk " << fh << " ino " << in->ino << dendl; - int ret = _do_filelock(in, fh, CEPH_LOCK_FCNTL, CEPH_MDS_OP_GETFILELOCK, 0, fl, owner); - return ret; -} - -int Client::_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep) -{ - Inode *in = fh->inode.get(); - ldout(cct, 10) << "_setlk " << fh << " ino " << in->ino << dendl; - int ret = _do_filelock(in, fh, CEPH_LOCK_FCNTL, CEPH_MDS_OP_SETFILELOCK, sleep, fl, owner); - ldout(cct, 10) << "_setlk " << fh << " ino " << in->ino << " result=" << ret << dendl; - return ret; -} - -int Client::_flock(Fh *fh, int cmd, uint64_t owner) -{ - Inode *in = fh->inode.get(); - ldout(cct, 10) << "_flock " << fh << " ino " << in->ino << dendl; - - int sleep = !(cmd & LOCK_NB); - cmd &= ~LOCK_NB; - - int type; - switch (cmd) { - case LOCK_SH: - type = F_RDLCK; - break; - case LOCK_EX: - type = F_WRLCK; - break; - case LOCK_UN: - type = F_UNLCK; - break; - default: - return -EINVAL; - } - - struct flock fl; - memset(&fl, 0, sizeof(fl)); - fl.l_type = type; - fl.l_whence = SEEK_SET; - - int ret = _do_filelock(in, fh, CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, sleep, &fl, owner); - ldout(cct, 10) << "_flock " << fh << " ino " << in->ino << " result=" << ret << dendl; - return ret; -} - -int Client::ll_statfs(Inode *in, struct statvfs *stbuf, const UserPerm& perms) -{ - /* Since the only thing this does is wrap a call to statfs, and - statfs takes a lock, it doesn't seem we have a need to split it - out. */ - return statfs(0, stbuf, perms); -} - -void Client::ll_register_callbacks(struct client_callback_args *args) -{ - if (!args) - return; - Mutex::Locker l(client_lock); - ldout(cct, 10) << "ll_register_callbacks cb " << args->handle - << " invalidate_ino_cb " << args->ino_cb - << " invalidate_dentry_cb " << args->dentry_cb - << " getgroups_cb" << args->getgroups_cb - << " switch_interrupt_cb " << args->switch_intr_cb - << " remount_cb " << args->remount_cb - << dendl; - callback_handle = args->handle; - if (args->ino_cb) { - ino_invalidate_cb = args->ino_cb; - async_ino_invalidator.start(); - } - if (args->dentry_cb) { - dentry_invalidate_cb = args->dentry_cb; - async_dentry_invalidator.start(); - } - if (args->switch_intr_cb) { - switch_interrupt_cb = args->switch_intr_cb; - interrupt_finisher.start(); - } - if (args->remount_cb) { - remount_cb = args->remount_cb; - remount_finisher.start(); - } - getgroups_cb = args->getgroups_cb; - umask_cb = args->umask_cb; -} - -int Client::test_dentry_handling(bool can_invalidate) -{ - int r = 0; - - can_invalidate_dentries = can_invalidate; - - if (can_invalidate_dentries) { - assert(dentry_invalidate_cb); - ldout(cct, 1) << "using dentry_invalidate_cb" << dendl; - } else if (remount_cb) { - ldout(cct, 1) << "using remount_cb" << dendl; - int s = remount_cb(callback_handle); - if (s) { - lderr(cct) << "Failed to invoke remount, needed to ensure kernel dcache consistency" - << dendl; - } - if (cct->_conf->client_die_on_failed_remount) { - require_remount = true; - r = s; - } - } else { - lderr(cct) << "no method to invalidate kernel dentry cache; expect issues!" << dendl; - if (cct->_conf->client_die_on_failed_remount) - ceph_abort(); - } - return r; -} - -int Client::_sync_fs() -{ - ldout(cct, 10) << "_sync_fs" << dendl; - - // flush file data - Mutex lock("Client::_fsync::lock"); - Cond cond; - bool flush_done = false; - if (cct->_conf->client_oc) - objectcacher->flush_all(new C_SafeCond(&lock, &cond, &flush_done)); - else - flush_done = true; - - // flush caps - flush_caps_sync(); - ceph_tid_t flush_tid = last_flush_tid; - - // wait for unsafe mds requests - wait_unsafe_requests(); - - wait_sync_caps(flush_tid); - - if (!flush_done) { - client_lock.Unlock(); - lock.Lock(); - ldout(cct, 15) << "waiting on data to flush" << dendl; - while (!flush_done) - cond.Wait(lock); - lock.Unlock(); - client_lock.Lock(); - } - - return 0; -} - -int Client::sync_fs() -{ - Mutex::Locker l(client_lock); - - if (unmounting) - return -ENOTCONN; - - return _sync_fs(); -} - -int64_t Client::drop_caches() -{ - Mutex::Locker l(client_lock); - return objectcacher->release_all(); -} - - -int Client::lazyio_propogate(int fd, loff_t offset, size_t count) -{ - Mutex::Locker l(client_lock); - ldout(cct, 3) << "op: client->lazyio_propogate(" << fd - << ", " << offset << ", " << count << ")" << dendl; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; - - // for now - _fsync(f, true); - - return 0; -} - -int Client::lazyio_synchronize(int fd, loff_t offset, size_t count) -{ - Mutex::Locker l(client_lock); - ldout(cct, 3) << "op: client->lazyio_synchronize(" << fd - << ", " << offset << ", " << count << ")" << dendl; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; - Inode *in = f->inode.get(); - - _fsync(f, true); - if (_release(in)) - check_caps(in, 0); - return 0; -} - - -// ============================= -// snaps - -int Client::mksnap(const char *relpath, const char *name, const UserPerm& perm) -{ - Mutex::Locker l(client_lock); - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - int r = path_walk(path, &in, perm); - if (r < 0) - return r; - if (cct->_conf->client_permissions) { - r = may_create(in.get(), perm); - if (r < 0) - return r; - } - Inode *snapdir = open_snapdir(in.get()); - return _mkdir(snapdir, name, 0, perm); -} - -int Client::rmsnap(const char *relpath, const char *name, const UserPerm& perms) -{ - Mutex::Locker l(client_lock); - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - int r = path_walk(path, &in, perms); - if (r < 0) - return r; - if (cct->_conf->client_permissions) { - r = may_delete(in.get(), NULL, perms); - if (r < 0) - return r; - } - Inode *snapdir = open_snapdir(in.get()); - return _rmdir(snapdir, name, perms); -} - -// ============================= -// expose caps - -int Client::get_caps_issued(int fd) { - - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; - - return f->inode->caps_issued(); -} - -int Client::get_caps_issued(const char *path, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - filepath p(path); - InodeRef in; - int r = path_walk(p, &in, perms, true); - if (r < 0) - return r; - return in->caps_issued(); -} - -// ========================================= -// low level - -Inode *Client::open_snapdir(Inode *diri) -{ - Inode *in; - vinodeno_t vino(diri->ino, CEPH_SNAPDIR); - if (!inode_map.count(vino)) { - in = new Inode(this, vino, &diri->layout); - - in->ino = diri->ino; - in->snapid = CEPH_SNAPDIR; - in->mode = diri->mode; - in->uid = diri->uid; - in->gid = diri->gid; - in->mtime = diri->mtime; - in->ctime = diri->ctime; - in->btime = diri->btime; - in->size = diri->size; - in->change_attr = diri->change_attr; - - in->dirfragtree.clear(); - in->snapdir_parent = diri; - diri->flags |= I_SNAPDIR_OPEN; - inode_map[vino] = in; - if (use_faked_inos()) - _assign_faked_ino(in); - ldout(cct, 10) << "open_snapdir created snapshot inode " << *in << dendl; - } else { - in = inode_map[vino]; - ldout(cct, 10) << "open_snapdir had snapshot inode " << *in << dendl; - } - return in; -} - -int Client::ll_lookup(Inode *parent, const char *name, struct stat *attr, - Inode **out, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - vinodeno_t vparent = _get_vino(parent); - ldout(cct, 3) << "ll_lookup " << vparent << " " << name << dendl; - tout(cct) << "ll_lookup" << std::endl; - tout(cct) << name << std::endl; - - if (unmounting) - return -ENOTCONN; - - int r = 0; - if (!cct->_conf->fuse_default_permissions) { - r = may_lookup(parent, perms); - if (r < 0) - return r; - } - - string dname(name); - InodeRef in; - - r = _lookup(parent, dname, CEPH_STAT_CAP_INODE_ALL, &in, perms); - if (r < 0) { - attr->st_ino = 0; - goto out; - } - - assert(in); - fill_stat(in, attr); - _ll_get(in.get()); - - out: - ldout(cct, 3) << "ll_lookup " << vparent << " " << name - << " -> " << r << " (" << hex << attr->st_ino << dec << ")" << dendl; - tout(cct) << attr->st_ino << std::endl; - *out = in.get(); - return r; -} - -int Client::ll_lookupx(Inode *parent, const char *name, Inode **out, - struct ceph_statx *stx, unsigned want, unsigned flags, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - vinodeno_t vparent = _get_vino(parent); - ldout(cct, 3) << "ll_lookupx " << vparent << " " << name << dendl; - tout(cct) << "ll_lookupx" << std::endl; - tout(cct) << name << std::endl; - - if (unmounting) - return -ENOTCONN; - - int r = 0; - if (!cct->_conf->fuse_default_permissions) { - r = may_lookup(parent, perms); - if (r < 0) - return r; - } - - string dname(name); - InodeRef in; - - unsigned mask = statx_to_mask(flags, want); - r = _lookup(parent, dname, mask, &in, perms); - if (r < 0) { - stx->stx_ino = 0; - stx->stx_mask = 0; - } else { - assert(in); - fill_statx(in, mask, stx); - _ll_get(in.get()); - } - - ldout(cct, 3) << "ll_lookupx " << vparent << " " << name - << " -> " << r << " (" << hex << stx->stx_ino << dec << ")" << dendl; - tout(cct) << stx->stx_ino << std::endl; - *out = in.get(); - return r; -} - -int Client::ll_walk(const char* name, Inode **out, struct ceph_statx *stx, - unsigned int want, unsigned int flags, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - filepath fp(name, 0); - InodeRef in; - int rc; - unsigned mask = statx_to_mask(flags, want); - - ldout(cct, 3) << "ll_walk" << name << dendl; - tout(cct) << "ll_walk" << std::endl; - tout(cct) << name << std::endl; - - rc = path_walk(fp, &in, perms, !(flags & AT_SYMLINK_NOFOLLOW), mask); - if (rc < 0) { - /* zero out mask, just in case... */ - stx->stx_mask = 0; - stx->stx_ino = 0; - *out = NULL; - return rc; - } else { - assert(in); - fill_statx(in, mask, stx); - _ll_get(in.get()); - *out = in.get(); - return 0; - } -} - -void Client::_ll_get(Inode *in) -{ - if (in->ll_ref == 0) { - in->get(); - if (in->is_dir() && !in->dn_set.empty()) { - assert(in->dn_set.size() == 1); // dirs can't be hard-linked - in->get_first_parent()->get(); // pin dentry - } - } - in->ll_get(); - ldout(cct, 20) << "_ll_get " << in << " " << in->ino << " -> " << in->ll_ref << dendl; -} - -int Client::_ll_put(Inode *in, int num) -{ - in->ll_put(num); - ldout(cct, 20) << "_ll_put " << in << " " << in->ino << " " << num << " -> " << in->ll_ref << dendl; - if (in->ll_ref == 0) { - if (in->is_dir() && !in->dn_set.empty()) { - assert(in->dn_set.size() == 1); // dirs can't be hard-linked - in->get_first_parent()->put(); // unpin dentry - } - put_inode(in); - return 0; - } else { - return in->ll_ref; - } -} - -void Client::_ll_drop_pins() -{ - ldout(cct, 10) << "_ll_drop_pins" << dendl; - ceph::unordered_map::iterator next; - for (ceph::unordered_map::iterator it = inode_map.begin(); - it != inode_map.end(); - it = next) { - Inode *in = it->second; - next = it; - ++next; - if (in->ll_ref) - _ll_put(in, in->ll_ref); - } -} - -bool Client::ll_forget(Inode *in, int count) -{ - Mutex::Locker lock(client_lock); - inodeno_t ino = _get_inodeno(in); - - ldout(cct, 3) << "ll_forget " << ino << " " << count << dendl; - tout(cct) << "ll_forget" << std::endl; - tout(cct) << ino.val << std::endl; - tout(cct) << count << std::endl; - - // Ignore forget if we're no longer mounted - if (unmounting) - return true; - - if (ino == 1) return true; // ignore forget on root. - - bool last = false; - if (in->ll_ref < count) { - ldout(cct, 1) << "WARNING: ll_forget on " << ino << " " << count - << ", which only has ll_ref=" << in->ll_ref << dendl; - _ll_put(in, in->ll_ref); - last = true; - } else { - if (_ll_put(in, count) == 0) - last = true; - } - - return last; -} - -bool Client::ll_put(Inode *in) -{ - /* ll_forget already takes the lock */ - return ll_forget(in, 1); -} - -snapid_t Client::ll_get_snapid(Inode *in) -{ - Mutex::Locker lock(client_lock); - return in->snapid; -} - -Inode *Client::ll_get_inode(ino_t ino) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return NULL; - - vinodeno_t vino = _map_faked_ino(ino); - unordered_map::iterator p = inode_map.find(vino); - if (p == inode_map.end()) - return NULL; - Inode *in = p->second; - _ll_get(in); - return in; -} - -Inode *Client::ll_get_inode(vinodeno_t vino) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return NULL; - - unordered_map::iterator p = inode_map.find(vino); - if (p == inode_map.end()) - return NULL; - Inode *in = p->second; - _ll_get(in); - return in; -} - -int Client::_ll_getattr(Inode *in, int caps, const UserPerm& perms) -{ - vinodeno_t vino = _get_vino(in); - - ldout(cct, 3) << "ll_getattr " << vino << dendl; - tout(cct) << "ll_getattr" << std::endl; - tout(cct) << vino.ino.val << std::endl; - - if (vino.snapid < CEPH_NOSNAP) - return 0; - else - return _getattr(in, caps, perms); -} - -int Client::ll_getattr(Inode *in, struct stat *attr, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - int res = _ll_getattr(in, CEPH_STAT_CAP_INODE_ALL, perms); - - if (res == 0) - fill_stat(in, attr); - ldout(cct, 3) << "ll_getattr " << _get_vino(in) << " = " << res << dendl; - return res; -} - -int Client::ll_getattrx(Inode *in, struct ceph_statx *stx, unsigned int want, - unsigned int flags, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - int res = 0; - unsigned mask = statx_to_mask(flags, want); - - if (mask && !in->caps_issued_mask(mask)) - res = _ll_getattr(in, mask, perms); - - if (res == 0) - fill_statx(in, mask, stx); - ldout(cct, 3) << "ll_getattrx " << _get_vino(in) << " = " << res << dendl; - return res; -} - -int Client::_ll_setattrx(Inode *in, struct ceph_statx *stx, int mask, - const UserPerm& perms, InodeRef *inp) -{ - vinodeno_t vino = _get_vino(in); - - ldout(cct, 3) << "ll_setattrx " << vino << " mask " << hex << mask << dec - << dendl; - tout(cct) << "ll_setattrx" << std::endl; - tout(cct) << vino.ino.val << std::endl; - tout(cct) << stx->stx_mode << std::endl; - tout(cct) << stx->stx_uid << std::endl; - tout(cct) << stx->stx_gid << std::endl; - tout(cct) << stx->stx_size << std::endl; - tout(cct) << stx->stx_mtime << std::endl; - tout(cct) << stx->stx_atime << std::endl; - tout(cct) << stx->stx_btime << std::endl; - tout(cct) << mask << std::endl; - - if (!cct->_conf->fuse_default_permissions) { - int res = may_setattr(in, stx, mask, perms); - if (res < 0) - return res; - } - - mask &= ~(CEPH_SETATTR_MTIME_NOW | CEPH_SETATTR_ATIME_NOW); - - return __setattrx(in, stx, mask, perms, inp); -} - -int Client::ll_setattrx(Inode *in, struct ceph_statx *stx, int mask, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - InodeRef target(in); - int res = _ll_setattrx(in, stx, mask, perms, &target); - if (res == 0) { - assert(in == target.get()); - fill_statx(in, in->caps_issued(), stx); - } - - ldout(cct, 3) << "ll_setattrx " << _get_vino(in) << " = " << res << dendl; - return res; -} - -int Client::ll_setattr(Inode *in, struct stat *attr, int mask, - const UserPerm& perms) -{ - struct ceph_statx stx; - stat_to_statx(attr, &stx); - - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - InodeRef target(in); - int res = _ll_setattrx(in, &stx, mask, perms, &target); - if (res == 0) { - assert(in == target.get()); - fill_stat(in, attr); - } - - ldout(cct, 3) << "ll_setattr " << _get_vino(in) << " = " << res << dendl; - return res; -} - - -// ---------- -// xattrs - -int Client::getxattr(const char *path, const char *name, void *value, size_t size, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - InodeRef in; - int r = Client::path_walk(path, &in, perms, true, CEPH_STAT_CAP_XATTR); - if (r < 0) - return r; - return _getxattr(in, name, value, size, perms); -} - -int Client::lgetxattr(const char *path, const char *name, void *value, size_t size, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - InodeRef in; - int r = Client::path_walk(path, &in, perms, false, CEPH_STAT_CAP_XATTR); - if (r < 0) - return r; - return _getxattr(in, name, value, size, perms); -} - -int Client::fgetxattr(int fd, const char *name, void *value, size_t size, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; - return _getxattr(f->inode, name, value, size, perms); -} - -int Client::listxattr(const char *path, char *list, size_t size, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - InodeRef in; - int r = Client::path_walk(path, &in, perms, true, CEPH_STAT_CAP_XATTR); - if (r < 0) - return r; - return Client::_listxattr(in.get(), list, size, perms); -} - -int Client::llistxattr(const char *path, char *list, size_t size, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - InodeRef in; - int r = Client::path_walk(path, &in, perms, false, CEPH_STAT_CAP_XATTR); - if (r < 0) - return r; - return Client::_listxattr(in.get(), list, size, perms); -} - -int Client::flistxattr(int fd, char *list, size_t size, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; - return Client::_listxattr(f->inode.get(), list, size, perms); -} - -int Client::removexattr(const char *path, const char *name, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - InodeRef in; - int r = Client::path_walk(path, &in, perms, true); - if (r < 0) - return r; - return _removexattr(in, name, perms); -} - -int Client::lremovexattr(const char *path, const char *name, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - InodeRef in; - int r = Client::path_walk(path, &in, perms, false); - if (r < 0) - return r; - return _removexattr(in, name, perms); -} - -int Client::fremovexattr(int fd, const char *name, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; - return _removexattr(f->inode, name, perms); -} - -int Client::setxattr(const char *path, const char *name, const void *value, - size_t size, int flags, const UserPerm& perms) -{ - _setxattr_maybe_wait_for_osdmap(name, value, size); - - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - InodeRef in; - int r = Client::path_walk(path, &in, perms, true); - if (r < 0) - return r; - return _setxattr(in, name, value, size, flags, perms); -} - -int Client::lsetxattr(const char *path, const char *name, const void *value, - size_t size, int flags, const UserPerm& perms) -{ - _setxattr_maybe_wait_for_osdmap(name, value, size); - - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - InodeRef in; - int r = Client::path_walk(path, &in, perms, false); - if (r < 0) - return r; - return _setxattr(in, name, value, size, flags, perms); -} - -int Client::fsetxattr(int fd, const char *name, const void *value, size_t size, - int flags, const UserPerm& perms) -{ - _setxattr_maybe_wait_for_osdmap(name, value, size); - - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; - return _setxattr(f->inode, name, value, size, flags, perms); -} - -int Client::_getxattr(Inode *in, const char *name, void *value, size_t size, - const UserPerm& perms) -{ - int r; - - const VXattr *vxattr = _match_vxattr(in, name); - if (vxattr) { - r = -ENODATA; - - // Do a force getattr to get the latest quota before returning - // a value to userspace. - r = _getattr(in, 0, perms, true); - if (r != 0) { - // Error from getattr! - return r; - } - - // call pointer-to-member function - char buf[256]; - if (!(vxattr->exists_cb && !(this->*(vxattr->exists_cb))(in))) { - r = (this->*(vxattr->getxattr_cb))(in, buf, sizeof(buf)); - } else { - r = -ENODATA; - } - - if (size != 0) { - if (r > (int)size) { - r = -ERANGE; - } else if (r > 0) { - memcpy(value, buf, r); - } - } - goto out; - } - - if (acl_type == NO_ACL && !strncmp(name, "system.", 7)) { - r = -EOPNOTSUPP; - goto out; - } - - r = _getattr(in, CEPH_STAT_CAP_XATTR, perms, in->xattr_version == 0); - if (r == 0) { - string n(name); - r = -ENODATA; - if (in->xattrs.count(n)) { - r = in->xattrs[n].length(); - if (r > 0 && size != 0) { - if (size >= (unsigned)r) - memcpy(value, in->xattrs[n].c_str(), r); - else - r = -ERANGE; - } - } - } - out: - ldout(cct, 3) << "_getxattr(" << in->ino << ", \"" << name << "\", " << size << ") = " << r << dendl; - return r; -} - -int Client::_getxattr(InodeRef &in, const char *name, void *value, size_t size, - const UserPerm& perms) -{ - if (cct->_conf->client_permissions) { - int r = xattr_permission(in.get(), name, MAY_READ, perms); - if (r < 0) - return r; - } - return _getxattr(in.get(), name, value, size, perms); -} - -int Client::ll_getxattr(Inode *in, const char *name, void *value, - size_t size, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vino = _get_vino(in); - - ldout(cct, 3) << "ll_getxattr " << vino << " " << name << " size " << size << dendl; - tout(cct) << "ll_getxattr" << std::endl; - tout(cct) << vino.ino.val << std::endl; - tout(cct) << name << std::endl; - - if (!cct->_conf->fuse_default_permissions) { - int r = xattr_permission(in, name, MAY_READ, perms); - if (r < 0) - return r; - } - - return _getxattr(in, name, value, size, perms); -} - -int Client::_listxattr(Inode *in, char *name, size_t size, - const UserPerm& perms) -{ - int r = _getattr(in, CEPH_STAT_CAP_XATTR, perms, in->xattr_version == 0); - if (r == 0) { - for (map::iterator p = in->xattrs.begin(); - p != in->xattrs.end(); - ++p) - r += p->first.length() + 1; - - const VXattr *vxattrs = _get_vxattrs(in); - r += _vxattrs_name_size(vxattrs); - - if (size != 0) { - if (size >= (unsigned)r) { - for (map::iterator p = in->xattrs.begin(); - p != in->xattrs.end(); - ++p) { - memcpy(name, p->first.c_str(), p->first.length()); - name += p->first.length(); - *name = '\0'; - name++; - } - if (vxattrs) { - for (int i = 0; !vxattrs[i].name.empty(); i++) { - const VXattr& vxattr = vxattrs[i]; - if (vxattr.hidden) - continue; - // call pointer-to-member function - if(vxattr.exists_cb && !(this->*(vxattr.exists_cb))(in)) - continue; - memcpy(name, vxattr.name.c_str(), vxattr.name.length()); - name += vxattr.name.length(); - *name = '\0'; - name++; - } - } - } else - r = -ERANGE; - } - } - ldout(cct, 3) << "_listxattr(" << in->ino << ", " << size << ") = " << r << dendl; - return r; -} - -int Client::ll_listxattr(Inode *in, char *names, size_t size, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vino = _get_vino(in); - - ldout(cct, 3) << "ll_listxattr " << vino << " size " << size << dendl; - tout(cct) << "ll_listxattr" << std::endl; - tout(cct) << vino.ino.val << std::endl; - tout(cct) << size << std::endl; - - return _listxattr(in, names, size, perms); -} - -int Client::_do_setxattr(Inode *in, const char *name, const void *value, - size_t size, int flags, const UserPerm& perms) -{ - - int xattr_flags = 0; - if (!value) - xattr_flags |= CEPH_XATTR_REMOVE; - if (flags & XATTR_CREATE) - xattr_flags |= CEPH_XATTR_CREATE; - if (flags & XATTR_REPLACE) - xattr_flags |= CEPH_XATTR_REPLACE; - - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_SETXATTR); - filepath path; - in->make_nosnap_relative_path(path); - req->set_filepath(path); - req->set_string2(name); - req->set_inode(in); - req->head.args.setxattr.flags = xattr_flags; - - bufferlist bl; - bl.append((const char*)value, size); - req->set_data(bl); - - int res = make_request(req, perms); - - trim_cache(); - ldout(cct, 3) << "_setxattr(" << in->ino << ", \"" << name << "\") = " << - res << dendl; - return res; -} - -int Client::_setxattr(Inode *in, const char *name, const void *value, - size_t size, int flags, const UserPerm& perms) -{ - if (in->snapid != CEPH_NOSNAP) { - return -EROFS; - } - - bool posix_acl_xattr = false; - if (acl_type == POSIX_ACL) - posix_acl_xattr = !strncmp(name, "system.", 7); - - if (strncmp(name, "user.", 5) && - strncmp(name, "security.", 9) && - strncmp(name, "trusted.", 8) && - strncmp(name, "ceph.", 5) && - !posix_acl_xattr) - return -EOPNOTSUPP; - - if (posix_acl_xattr) { - if (!strcmp(name, ACL_EA_ACCESS)) { - mode_t new_mode = in->mode; - if (value) { - int ret = posix_acl_equiv_mode(value, size, &new_mode); - if (ret < 0) - return ret; - if (ret == 0) { - value = NULL; - size = 0; - } - if (new_mode != in->mode) { - struct ceph_statx stx; - stx.stx_mode = new_mode; - ret = _do_setattr(in, &stx, CEPH_SETATTR_MODE, perms, NULL); - if (ret < 0) - return ret; - } - } - } else if (!strcmp(name, ACL_EA_DEFAULT)) { - if (value) { - if (!S_ISDIR(in->mode)) - return -EACCES; - int ret = posix_acl_check(value, size); - if (ret < 0) - return -EINVAL; - if (ret == 0) { - value = NULL; - size = 0; - } - } - } else { - return -EOPNOTSUPP; - } - } else { - const VXattr *vxattr = _match_vxattr(in, name); - if (vxattr && vxattr->readonly) - return -EOPNOTSUPP; - } - - return _do_setxattr(in, name, value, size, flags, perms); -} - -int Client::_setxattr(InodeRef &in, const char *name, const void *value, - size_t size, int flags, const UserPerm& perms) -{ - if (cct->_conf->client_permissions) { - int r = xattr_permission(in.get(), name, MAY_WRITE, perms); - if (r < 0) - return r; - } - return _setxattr(in.get(), name, value, size, flags, perms); -} - -int Client::_setxattr_check_data_pool(string& name, string& value, const OSDMap *osdmap) -{ - string tmp; - if (name == "layout") { - string::iterator begin = value.begin(); - string::iterator end = value.end(); - keys_and_values p; // create instance of parser - std::map m; // map to receive results - if (!qi::parse(begin, end, p, m)) { // returns true if successful - return -EINVAL; - } - if (begin != end) - return -EINVAL; - for (map::iterator q = m.begin(); q != m.end(); ++q) { - if (q->first == "pool") { - tmp = q->second; - break; - } - } - } else if (name == "layout.pool") { - tmp = value; - } - - if (tmp.length()) { - int64_t pool; - try { - pool = boost::lexical_cast(tmp); - if (!osdmap->have_pg_pool(pool)) - return -ENOENT; - } catch (boost::bad_lexical_cast const&) { - pool = osdmap->lookup_pg_pool_name(tmp); - if (pool < 0) { - return -ENOENT; - } - } - } - - return 0; -} - -void Client::_setxattr_maybe_wait_for_osdmap(const char *name, const void *value, size_t size) -{ - // For setting pool of layout, MetaRequest need osdmap epoch. - // There is a race which create a new data pool but client and mds both don't have. - // Make client got the latest osdmap which make mds quickly judge whether get newer osdmap. - if (strcmp(name, "ceph.file.layout.pool") == 0 || strcmp(name, "ceph.dir.layout.pool") == 0 || - strcmp(name, "ceph.file.layout") == 0 || strcmp(name, "ceph.dir.layout") == 0) { - string rest(strstr(name, "layout")); - string v((const char*)value, size); - int r = objecter->with_osdmap([&](const OSDMap& o) { - return _setxattr_check_data_pool(rest, v, &o); - }); - - if (r == -ENOENT) { - C_SaferCond ctx; - objecter->wait_for_latest_osdmap(&ctx); - ctx.wait(); - } - } -} - -int Client::ll_setxattr(Inode *in, const char *name, const void *value, - size_t size, int flags, const UserPerm& perms) -{ - _setxattr_maybe_wait_for_osdmap(name, value, size); - - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vino = _get_vino(in); - - ldout(cct, 3) << "ll_setxattr " << vino << " " << name << " size " << size << dendl; - tout(cct) << "ll_setxattr" << std::endl; - tout(cct) << vino.ino.val << std::endl; - tout(cct) << name << std::endl; - - if (!cct->_conf->fuse_default_permissions) { - int r = xattr_permission(in, name, MAY_WRITE, perms); - if (r < 0) - return r; - } - return _setxattr(in, name, value, size, flags, perms); -} - -int Client::_removexattr(Inode *in, const char *name, const UserPerm& perms) -{ - if (in->snapid != CEPH_NOSNAP) { - return -EROFS; - } - - // same xattrs supported by kernel client - if (strncmp(name, "user.", 5) && - strncmp(name, "system.", 7) && - strncmp(name, "security.", 9) && - strncmp(name, "trusted.", 8) && - strncmp(name, "ceph.", 5)) - return -EOPNOTSUPP; - - const VXattr *vxattr = _match_vxattr(in, name); - if (vxattr && vxattr->readonly) - return -EOPNOTSUPP; - - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_RMXATTR); - filepath path; - in->make_nosnap_relative_path(path); - req->set_filepath(path); - req->set_filepath2(name); - req->set_inode(in); - - int res = make_request(req, perms); - - trim_cache(); - ldout(cct, 3) << "_removexattr(" << in->ino << ", \"" << name << "\") = " << res << dendl; - return res; -} - -int Client::_removexattr(InodeRef &in, const char *name, const UserPerm& perms) -{ - if (cct->_conf->client_permissions) { - int r = xattr_permission(in.get(), name, MAY_WRITE, perms); - if (r < 0) - return r; - } - return _removexattr(in.get(), name, perms); -} - -int Client::ll_removexattr(Inode *in, const char *name, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vino = _get_vino(in); - - ldout(cct, 3) << "ll_removexattr " << vino << " " << name << dendl; - tout(cct) << "ll_removexattr" << std::endl; - tout(cct) << vino.ino.val << std::endl; - tout(cct) << name << std::endl; - - if (!cct->_conf->fuse_default_permissions) { - int r = xattr_permission(in, name, MAY_WRITE, perms); - if (r < 0) - return r; - } - - return _removexattr(in, name, perms); -} - -bool Client::_vxattrcb_quota_exists(Inode *in) -{ - return in->quota.is_enable(); -} -size_t Client::_vxattrcb_quota(Inode *in, char *val, size_t size) -{ - return snprintf(val, size, - "max_bytes=%lld max_files=%lld", - (long long int)in->quota.max_bytes, - (long long int)in->quota.max_files); -} -size_t Client::_vxattrcb_quota_max_bytes(Inode *in, char *val, size_t size) -{ - return snprintf(val, size, "%lld", (long long int)in->quota.max_bytes); -} -size_t Client::_vxattrcb_quota_max_files(Inode *in, char *val, size_t size) -{ - return snprintf(val, size, "%lld", (long long int)in->quota.max_files); -} - -bool Client::_vxattrcb_layout_exists(Inode *in) -{ - return in->layout != file_layout_t(); -} -size_t Client::_vxattrcb_layout(Inode *in, char *val, size_t size) -{ - int r = snprintf(val, size, - "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=", - (unsigned long long)in->layout.stripe_unit, - (unsigned long long)in->layout.stripe_count, - (unsigned long long)in->layout.object_size); - objecter->with_osdmap([&](const OSDMap& o) { - if (o.have_pg_pool(in->layout.pool_id)) - r += snprintf(val + r, size - r, "%s", - o.get_pool_name(in->layout.pool_id).c_str()); - else - r += snprintf(val + r, size - r, "%" PRIu64, - (uint64_t)in->layout.pool_id); - }); - if (in->layout.pool_ns.length()) - r += snprintf(val + r, size - r, " pool_namespace=%s", - in->layout.pool_ns.c_str()); - return r; -} -size_t Client::_vxattrcb_layout_stripe_unit(Inode *in, char *val, size_t size) -{ - return snprintf(val, size, "%lld", (unsigned long long)in->layout.stripe_unit); -} -size_t Client::_vxattrcb_layout_stripe_count(Inode *in, char *val, size_t size) -{ - return snprintf(val, size, "%lld", (unsigned long long)in->layout.stripe_count); -} -size_t Client::_vxattrcb_layout_object_size(Inode *in, char *val, size_t size) -{ - return snprintf(val, size, "%lld", (unsigned long long)in->layout.object_size); -} -size_t Client::_vxattrcb_layout_pool(Inode *in, char *val, size_t size) -{ - size_t r; - objecter->with_osdmap([&](const OSDMap& o) { - if (o.have_pg_pool(in->layout.pool_id)) - r = snprintf(val, size, "%s", o.get_pool_name( - in->layout.pool_id).c_str()); - else - r = snprintf(val, size, "%" PRIu64, (uint64_t)in->layout.pool_id); - }); - return r; -} -size_t Client::_vxattrcb_layout_pool_namespace(Inode *in, char *val, size_t size) -{ - return snprintf(val, size, "%s", in->layout.pool_ns.c_str()); -} -size_t Client::_vxattrcb_dir_entries(Inode *in, char *val, size_t size) -{ - return snprintf(val, size, "%lld", (unsigned long long)(in->dirstat.nfiles + in->dirstat.nsubdirs)); -} -size_t Client::_vxattrcb_dir_files(Inode *in, char *val, size_t size) -{ - return snprintf(val, size, "%lld", (unsigned long long)in->dirstat.nfiles); -} -size_t Client::_vxattrcb_dir_subdirs(Inode *in, char *val, size_t size) -{ - return snprintf(val, size, "%lld", (unsigned long long)in->dirstat.nsubdirs); -} -size_t Client::_vxattrcb_dir_rentries(Inode *in, char *val, size_t size) -{ - return snprintf(val, size, "%lld", (unsigned long long)(in->rstat.rfiles + in->rstat.rsubdirs)); -} -size_t Client::_vxattrcb_dir_rfiles(Inode *in, char *val, size_t size) -{ - return snprintf(val, size, "%lld", (unsigned long long)in->rstat.rfiles); -} -size_t Client::_vxattrcb_dir_rsubdirs(Inode *in, char *val, size_t size) -{ - return snprintf(val, size, "%lld", (unsigned long long)in->rstat.rsubdirs); -} -size_t Client::_vxattrcb_dir_rbytes(Inode *in, char *val, size_t size) -{ - return snprintf(val, size, "%lld", (unsigned long long)in->rstat.rbytes); -} -size_t Client::_vxattrcb_dir_rctime(Inode *in, char *val, size_t size) -{ - return snprintf(val, size, "%ld.09%ld", (long)in->rstat.rctime.sec(), - (long)in->rstat.rctime.nsec()); -} - -#define CEPH_XATTR_NAME(_type, _name) "ceph." #_type "." #_name -#define CEPH_XATTR_NAME2(_type, _name, _name2) "ceph." #_type "." #_name "." #_name2 - -#define XATTR_NAME_CEPH(_type, _name) \ -{ \ - name: CEPH_XATTR_NAME(_type, _name), \ - getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \ - readonly: true, \ - hidden: false, \ - exists_cb: NULL, \ -} -#define XATTR_LAYOUT_FIELD(_type, _name, _field) \ -{ \ - name: CEPH_XATTR_NAME2(_type, _name, _field), \ - getxattr_cb: &Client::_vxattrcb_ ## _name ## _ ## _field, \ - readonly: false, \ - hidden: true, \ - exists_cb: &Client::_vxattrcb_layout_exists, \ -} -#define XATTR_QUOTA_FIELD(_type, _name) \ -{ \ - name: CEPH_XATTR_NAME(_type, _name), \ - getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \ - readonly: false, \ - hidden: true, \ - exists_cb: &Client::_vxattrcb_quota_exists, \ -} - -const Client::VXattr Client::_dir_vxattrs[] = { - { - name: "ceph.dir.layout", - getxattr_cb: &Client::_vxattrcb_layout, - readonly: false, - hidden: true, - exists_cb: &Client::_vxattrcb_layout_exists, - }, - XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), - XATTR_LAYOUT_FIELD(dir, layout, stripe_count), - XATTR_LAYOUT_FIELD(dir, layout, object_size), - XATTR_LAYOUT_FIELD(dir, layout, pool), - XATTR_LAYOUT_FIELD(dir, layout, pool_namespace), - XATTR_NAME_CEPH(dir, entries), - XATTR_NAME_CEPH(dir, files), - XATTR_NAME_CEPH(dir, subdirs), - XATTR_NAME_CEPH(dir, rentries), - XATTR_NAME_CEPH(dir, rfiles), - XATTR_NAME_CEPH(dir, rsubdirs), - XATTR_NAME_CEPH(dir, rbytes), - XATTR_NAME_CEPH(dir, rctime), - { - name: "ceph.quota", - getxattr_cb: &Client::_vxattrcb_quota, - readonly: false, - hidden: true, - exists_cb: &Client::_vxattrcb_quota_exists, - }, - XATTR_QUOTA_FIELD(quota, max_bytes), - XATTR_QUOTA_FIELD(quota, max_files), - { name: "" } /* Required table terminator */ -}; - -const Client::VXattr Client::_file_vxattrs[] = { - { - name: "ceph.file.layout", - getxattr_cb: &Client::_vxattrcb_layout, - readonly: false, - hidden: true, - exists_cb: &Client::_vxattrcb_layout_exists, - }, - XATTR_LAYOUT_FIELD(file, layout, stripe_unit), - XATTR_LAYOUT_FIELD(file, layout, stripe_count), - XATTR_LAYOUT_FIELD(file, layout, object_size), - XATTR_LAYOUT_FIELD(file, layout, pool), - XATTR_LAYOUT_FIELD(file, layout, pool_namespace), - { name: "" } /* Required table terminator */ -}; - -const Client::VXattr *Client::_get_vxattrs(Inode *in) -{ - if (in->is_dir()) - return _dir_vxattrs; - else if (in->is_file()) - return _file_vxattrs; - return NULL; -} - -const Client::VXattr *Client::_match_vxattr(Inode *in, const char *name) -{ - if (strncmp(name, "ceph.", 5) == 0) { - const VXattr *vxattr = _get_vxattrs(in); - if (vxattr) { - while (!vxattr->name.empty()) { - if (vxattr->name == name) - return vxattr; - vxattr++; - } - } - } - return NULL; -} - -size_t Client::_vxattrs_calcu_name_size(const VXattr *vxattr) -{ - size_t len = 0; - while (!vxattr->name.empty()) { - if (!vxattr->hidden) - len += vxattr->name.length() + 1; - vxattr++; - } - return len; -} - -int Client::ll_readlink(Inode *in, char *buf, size_t buflen, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vino = _get_vino(in); - - ldout(cct, 3) << "ll_readlink " << vino << dendl; - tout(cct) << "ll_readlink" << std::endl; - tout(cct) << vino.ino.val << std::endl; - - set::iterator dn = in->dn_set.begin(); - while (dn != in->dn_set.end()) { - touch_dn(*dn); - ++dn; - } - - int r = _readlink(in, buf, buflen); // FIXME: no permission checking! - ldout(cct, 3) << "ll_readlink " << vino << " = " << r << dendl; - return r; -} - -int Client::_mknod(Inode *dir, const char *name, mode_t mode, dev_t rdev, - const UserPerm& perms, InodeRef *inp) -{ - ldout(cct, 3) << "_mknod(" << dir->ino << " " << name << ", 0" << oct - << mode << dec << ", " << rdev << ", uid " << perms.uid() - << ", gid " << perms.gid() << ")" << dendl; - - if (strlen(name) > NAME_MAX) - return -ENAMETOOLONG; - - if (dir->snapid != CEPH_NOSNAP) { - return -EROFS; - } - if (is_quota_files_exceeded(dir, perms)) { - return -EDQUOT; - } - - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_MKNOD); - - filepath path; - dir->make_nosnap_relative_path(path); - path.push_dentry(name); - req->set_filepath(path); - req->set_inode(dir); - req->head.args.mknod.rdev = rdev; - req->dentry_drop = CEPH_CAP_FILE_SHARED; - req->dentry_unless = CEPH_CAP_FILE_EXCL; - - bufferlist xattrs_bl; - int res = _posix_acl_create(dir, &mode, xattrs_bl, perms); - if (res < 0) - goto fail; - req->head.args.mknod.mode = mode; - if (xattrs_bl.length() > 0) - req->set_data(xattrs_bl); - - Dentry *de; - res = get_or_create(dir, name, &de); - if (res < 0) - goto fail; - req->set_dentry(de); - - res = make_request(req, perms, inp); - - trim_cache(); - - ldout(cct, 3) << "mknod(" << path << ", 0" << oct << mode << dec << ") = " << res << dendl; - return res; - - fail: - put_request(req); - return res; -} - -int Client::ll_mknod(Inode *parent, const char *name, mode_t mode, - dev_t rdev, struct stat *attr, Inode **out, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vparent = _get_vino(parent); - - ldout(cct, 3) << "ll_mknod " << vparent << " " << name << dendl; - tout(cct) << "ll_mknod" << std::endl; - tout(cct) << vparent.ino.val << std::endl; - tout(cct) << name << std::endl; - tout(cct) << mode << std::endl; - tout(cct) << rdev << std::endl; - - if (!cct->_conf->fuse_default_permissions) { - int r = may_create(parent, perms); - if (r < 0) - return r; - } - - InodeRef in; - int r = _mknod(parent, name, mode, rdev, perms, &in); - if (r == 0) { - fill_stat(in, attr); - _ll_get(in.get()); - } - tout(cct) << attr->st_ino << std::endl; - ldout(cct, 3) << "ll_mknod " << vparent << " " << name - << " = " << r << " (" << hex << attr->st_ino << dec << ")" << dendl; - *out = in.get(); - return r; -} - -int Client::ll_mknodx(Inode *parent, const char *name, mode_t mode, - dev_t rdev, Inode **out, - struct ceph_statx *stx, unsigned want, unsigned flags, - const UserPerm& perms) -{ - unsigned caps = statx_to_mask(flags, want); - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vparent = _get_vino(parent); - - ldout(cct, 3) << "ll_mknodx " << vparent << " " << name << dendl; - tout(cct) << "ll_mknodx" << std::endl; - tout(cct) << vparent.ino.val << std::endl; - tout(cct) << name << std::endl; - tout(cct) << mode << std::endl; - tout(cct) << rdev << std::endl; - - if (!cct->_conf->fuse_default_permissions) { - int r = may_create(parent, perms); - if (r < 0) - return r; - } - - InodeRef in; - int r = _mknod(parent, name, mode, rdev, perms, &in); - if (r == 0) { - fill_statx(in, caps, stx); - _ll_get(in.get()); - } - tout(cct) << stx->stx_ino << std::endl; - ldout(cct, 3) << "ll_mknodx " << vparent << " " << name - << " = " << r << " (" << hex << stx->stx_ino << dec << ")" << dendl; - *out = in.get(); - return r; -} - -int Client::_create(Inode *dir, const char *name, int flags, mode_t mode, - InodeRef *inp, Fh **fhp, int stripe_unit, int stripe_count, - int object_size, const char *data_pool, bool *created, - const UserPerm& perms) -{ - ldout(cct, 3) << "_create(" << dir->ino << " " << name << ", 0" << oct << - mode << dec << ")" << dendl; - - if (strlen(name) > NAME_MAX) - return -ENAMETOOLONG; - if (dir->snapid != CEPH_NOSNAP) { - return -EROFS; - } - if (is_quota_files_exceeded(dir, perms)) { - return -EDQUOT; - } - - // use normalized flags to generate cmode - int cmode = ceph_flags_to_mode(ceph_flags_sys2wire(flags)); - if (cmode < 0) - return -EINVAL; - - int64_t pool_id = -1; - if (data_pool && *data_pool) { - pool_id = objecter->with_osdmap( - std::mem_fn(&OSDMap::lookup_pg_pool_name), data_pool); - if (pool_id < 0) - return -EINVAL; - if (pool_id > 0xffffffffll) - return -ERANGE; // bummer! - } - - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_CREATE); - - filepath path; - dir->make_nosnap_relative_path(path); - path.push_dentry(name); - req->set_filepath(path); - req->set_inode(dir); - req->head.args.open.flags = ceph_flags_sys2wire(flags | O_CREAT); - - req->head.args.open.stripe_unit = stripe_unit; - req->head.args.open.stripe_count = stripe_count; - req->head.args.open.object_size = object_size; - if (cct->_conf->client_debug_getattr_caps) - req->head.args.open.mask = DEBUG_GETATTR_CAPS; - else - req->head.args.open.mask = 0; - req->head.args.open.pool = pool_id; - req->dentry_drop = CEPH_CAP_FILE_SHARED; - req->dentry_unless = CEPH_CAP_FILE_EXCL; - - mode |= S_IFREG; - bufferlist xattrs_bl; - int res = _posix_acl_create(dir, &mode, xattrs_bl, perms); - if (res < 0) - goto fail; - req->head.args.open.mode = mode; - if (xattrs_bl.length() > 0) - req->set_data(xattrs_bl); - - Dentry *de; - res = get_or_create(dir, name, &de); - if (res < 0) - goto fail; - req->set_dentry(de); - - res = make_request(req, perms, inp, created); - if (res < 0) { - goto reply_error; - } - - /* If the caller passed a value in fhp, do the open */ - if(fhp) { - (*inp)->get_open_ref(cmode); - *fhp = _create_fh(inp->get(), flags, cmode, perms); - } - - reply_error: - trim_cache(); - - ldout(cct, 3) << "create(" << path << ", 0" << oct << mode << dec - << " layout " << stripe_unit - << ' ' << stripe_count - << ' ' << object_size - <<") = " << res << dendl; - return res; - - fail: - put_request(req); - return res; -} - - -int Client::_mkdir(Inode *dir, const char *name, mode_t mode, const UserPerm& perm, - InodeRef *inp) -{ - ldout(cct, 3) << "_mkdir(" << dir->ino << " " << name << ", 0" << oct - << mode << dec << ", uid " << perm.uid() - << ", gid " << perm.gid() << ")" << dendl; - - if (strlen(name) > NAME_MAX) - return -ENAMETOOLONG; - - if (dir->snapid != CEPH_NOSNAP && dir->snapid != CEPH_SNAPDIR) { - return -EROFS; - } - if (is_quota_files_exceeded(dir, perm)) { - return -EDQUOT; - } - MetaRequest *req = new MetaRequest(dir->snapid == CEPH_SNAPDIR ? - CEPH_MDS_OP_MKSNAP : CEPH_MDS_OP_MKDIR); - - filepath path; - dir->make_nosnap_relative_path(path); - path.push_dentry(name); - req->set_filepath(path); - req->set_inode(dir); - req->dentry_drop = CEPH_CAP_FILE_SHARED; - req->dentry_unless = CEPH_CAP_FILE_EXCL; - - mode |= S_IFDIR; - bufferlist xattrs_bl; - int res = _posix_acl_create(dir, &mode, xattrs_bl, perm); - if (res < 0) - goto fail; - req->head.args.mkdir.mode = mode; - if (xattrs_bl.length() > 0) - req->set_data(xattrs_bl); - - Dentry *de; - res = get_or_create(dir, name, &de); - if (res < 0) - goto fail; - req->set_dentry(de); - - ldout(cct, 10) << "_mkdir: making request" << dendl; - res = make_request(req, perm, inp); - ldout(cct, 10) << "_mkdir result is " << res << dendl; - - trim_cache(); - - ldout(cct, 3) << "_mkdir(" << path << ", 0" << oct << mode << dec << ") = " << res << dendl; - return res; - - fail: - put_request(req); - return res; -} - -int Client::ll_mkdir(Inode *parent, const char *name, mode_t mode, - struct stat *attr, Inode **out, const UserPerm& perm) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vparent = _get_vino(parent); - - ldout(cct, 3) << "ll_mkdir " << vparent << " " << name << dendl; - tout(cct) << "ll_mkdir" << std::endl; - tout(cct) << vparent.ino.val << std::endl; - tout(cct) << name << std::endl; - tout(cct) << mode << std::endl; - - if (!cct->_conf->fuse_default_permissions) { - int r = may_create(parent, perm); - if (r < 0) - return r; - } - - InodeRef in; - int r = _mkdir(parent, name, mode, perm, &in); - if (r == 0) { - fill_stat(in, attr); - _ll_get(in.get()); - } - tout(cct) << attr->st_ino << std::endl; - ldout(cct, 3) << "ll_mkdir " << vparent << " " << name - << " = " << r << " (" << hex << attr->st_ino << dec << ")" << dendl; - *out = in.get(); - return r; -} - -int Client::ll_mkdirx(Inode *parent, const char *name, mode_t mode, Inode **out, - struct ceph_statx *stx, unsigned want, unsigned flags, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vparent = _get_vino(parent); - - ldout(cct, 3) << "ll_mkdirx " << vparent << " " << name << dendl; - tout(cct) << "ll_mkdirx" << std::endl; - tout(cct) << vparent.ino.val << std::endl; - tout(cct) << name << std::endl; - tout(cct) << mode << std::endl; - - if (!cct->_conf->fuse_default_permissions) { - int r = may_create(parent, perms); - if (r < 0) - return r; - } - - InodeRef in; - int r = _mkdir(parent, name, mode, perms, &in); - if (r == 0) { - fill_statx(in, statx_to_mask(flags, want), stx); - _ll_get(in.get()); - } else { - stx->stx_ino = 0; - stx->stx_mask = 0; - } - tout(cct) << stx->stx_ino << std::endl; - ldout(cct, 3) << "ll_mkdirx " << vparent << " " << name - << " = " << r << " (" << hex << stx->stx_ino << dec << ")" << dendl; - *out = in.get(); - return r; -} - -int Client::_symlink(Inode *dir, const char *name, const char *target, - const UserPerm& perms, InodeRef *inp) -{ - ldout(cct, 3) << "_symlink(" << dir->ino << " " << name << ", " << target - << ", uid " << perms.uid() << ", gid " << perms.gid() << ")" - << dendl; - - if (strlen(name) > NAME_MAX) - return -ENAMETOOLONG; - - if (dir->snapid != CEPH_NOSNAP) { - return -EROFS; - } - if (is_quota_files_exceeded(dir, perms)) { - return -EDQUOT; - } - - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_SYMLINK); - - filepath path; - dir->make_nosnap_relative_path(path); - path.push_dentry(name); - req->set_filepath(path); - req->set_inode(dir); - req->set_string2(target); - req->dentry_drop = CEPH_CAP_FILE_SHARED; - req->dentry_unless = CEPH_CAP_FILE_EXCL; - - Dentry *de; - int res = get_or_create(dir, name, &de); - if (res < 0) - goto fail; - req->set_dentry(de); - - res = make_request(req, perms, inp); - - trim_cache(); - ldout(cct, 3) << "_symlink(\"" << path << "\", \"" << target << "\") = " << - res << dendl; - return res; - - fail: - put_request(req); - return res; -} - -int Client::ll_symlink(Inode *parent, const char *name, const char *value, - struct stat *attr, Inode **out, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vparent = _get_vino(parent); - - ldout(cct, 3) << "ll_symlink " << vparent << " " << name << " -> " << value - << dendl; - tout(cct) << "ll_symlink" << std::endl; - tout(cct) << vparent.ino.val << std::endl; - tout(cct) << name << std::endl; - tout(cct) << value << std::endl; - - if (!cct->_conf->fuse_default_permissions) { - int r = may_create(parent, perms); - if (r < 0) - return r; - } - - InodeRef in; - int r = _symlink(parent, name, value, perms, &in); - if (r == 0) { - fill_stat(in, attr); - _ll_get(in.get()); - } - tout(cct) << attr->st_ino << std::endl; - ldout(cct, 3) << "ll_symlink " << vparent << " " << name - << " = " << r << " (" << hex << attr->st_ino << dec << ")" << dendl; - *out = in.get(); - return r; -} - -int Client::ll_symlinkx(Inode *parent, const char *name, const char *value, - Inode **out, struct ceph_statx *stx, unsigned want, - unsigned flags, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vparent = _get_vino(parent); - - ldout(cct, 3) << "ll_symlinkx " << vparent << " " << name << " -> " << value - << dendl; - tout(cct) << "ll_symlinkx" << std::endl; - tout(cct) << vparent.ino.val << std::endl; - tout(cct) << name << std::endl; - tout(cct) << value << std::endl; - - if (!cct->_conf->fuse_default_permissions) { - int r = may_create(parent, perms); - if (r < 0) - return r; - } - - InodeRef in; - int r = _symlink(parent, name, value, perms, &in); - if (r == 0) { - fill_statx(in, statx_to_mask(flags, want), stx); - _ll_get(in.get()); - } - tout(cct) << stx->stx_ino << std::endl; - ldout(cct, 3) << "ll_symlinkx " << vparent << " " << name - << " = " << r << " (" << hex << stx->stx_ino << dec << ")" << dendl; - *out = in.get(); - return r; -} - -int Client::_unlink(Inode *dir, const char *name, const UserPerm& perm) -{ - ldout(cct, 3) << "_unlink(" << dir->ino << " " << name - << " uid " << perm.uid() << " gid " << perm.gid() - << ")" << dendl; - - if (dir->snapid != CEPH_NOSNAP) { - return -EROFS; - } - - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_UNLINK); - - filepath path; - dir->make_nosnap_relative_path(path); - path.push_dentry(name); - req->set_filepath(path); - - InodeRef otherin; - - Dentry *de; - int res = get_or_create(dir, name, &de); - if (res < 0) - goto fail; - req->set_dentry(de); - req->dentry_drop = CEPH_CAP_FILE_SHARED; - req->dentry_unless = CEPH_CAP_FILE_EXCL; - - res = _lookup(dir, name, 0, &otherin, perm); - if (res < 0) - goto fail; - req->set_other_inode(otherin.get()); - req->other_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; - - req->set_inode(dir); - - res = make_request(req, perm); - - trim_cache(); - ldout(cct, 3) << "unlink(" << path << ") = " << res << dendl; - return res; - - fail: - put_request(req); - return res; -} - -int Client::ll_unlink(Inode *in, const char *name, const UserPerm& perm) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vino = _get_vino(in); - - ldout(cct, 3) << "ll_unlink " << vino << " " << name << dendl; - tout(cct) << "ll_unlink" << std::endl; - tout(cct) << vino.ino.val << std::endl; - tout(cct) << name << std::endl; - - if (!cct->_conf->fuse_default_permissions) { - int r = may_delete(in, name, perm); - if (r < 0) - return r; - } - return _unlink(in, name, perm); -} - -int Client::_rmdir(Inode *dir, const char *name, const UserPerm& perms) -{ - ldout(cct, 3) << "_rmdir(" << dir->ino << " " << name << " uid " - << perms.uid() << " gid " << perms.gid() << ")" << dendl; - - if (dir->snapid != CEPH_NOSNAP && dir->snapid != CEPH_SNAPDIR) { - return -EROFS; - } - - MetaRequest *req = new MetaRequest(dir->snapid == CEPH_SNAPDIR ? CEPH_MDS_OP_RMSNAP:CEPH_MDS_OP_RMDIR); - filepath path; - dir->make_nosnap_relative_path(path); - path.push_dentry(name); - req->set_filepath(path); - - req->dentry_drop = CEPH_CAP_FILE_SHARED; - req->dentry_unless = CEPH_CAP_FILE_EXCL; - req->other_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; - - InodeRef in; - - Dentry *de; - int res = get_or_create(dir, name, &de); - if (res < 0) - goto fail; - res = _lookup(dir, name, 0, &in, perms); - if (res < 0) - goto fail; - if (req->get_op() == CEPH_MDS_OP_RMDIR) { - req->set_inode(dir); - req->set_dentry(de); - req->set_other_inode(in.get()); - } else { - unlink(de, true, true); - req->set_other_inode(in.get()); - } - - res = make_request(req, perms); - - trim_cache(); - ldout(cct, 3) << "rmdir(" << path << ") = " << res << dendl; - return res; - - fail: - put_request(req); - return res; -} - -int Client::ll_rmdir(Inode *in, const char *name, const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vino = _get_vino(in); - - ldout(cct, 3) << "ll_rmdir " << vino << " " << name << dendl; - tout(cct) << "ll_rmdir" << std::endl; - tout(cct) << vino.ino.val << std::endl; - tout(cct) << name << std::endl; - - if (!cct->_conf->fuse_default_permissions) { - int r = may_delete(in, name, perms); - if (r < 0) - return r; - } - - return _rmdir(in, name, perms); -} - -int Client::_rename(Inode *fromdir, const char *fromname, Inode *todir, const char *toname, const UserPerm& perm) -{ - ldout(cct, 3) << "_rename(" << fromdir->ino << " " << fromname << " to " - << todir->ino << " " << toname - << " uid " << perm.uid() << " gid " << perm.gid() << ")" - << dendl; - - if (fromdir->snapid != todir->snapid) - return -EXDEV; - - int op = CEPH_MDS_OP_RENAME; - if (fromdir->snapid != CEPH_NOSNAP) { - if (fromdir == todir && fromdir->snapid == CEPH_SNAPDIR) - op = CEPH_MDS_OP_RENAMESNAP; - else - return -EROFS; - } - if (fromdir != todir) { - Inode *fromdir_root = - fromdir->quota.is_enable() ? fromdir : get_quota_root(fromdir, perm); - Inode *todir_root = - todir->quota.is_enable() ? todir : get_quota_root(todir, perm); - if (fromdir_root != todir_root) { - return -EXDEV; - } - } - - InodeRef target; - MetaRequest *req = new MetaRequest(op); - - filepath from; - fromdir->make_nosnap_relative_path(from); - from.push_dentry(fromname); - filepath to; - todir->make_nosnap_relative_path(to); - to.push_dentry(toname); - req->set_filepath(to); - req->set_filepath2(from); - - Dentry *oldde; - int res = get_or_create(fromdir, fromname, &oldde); - if (res < 0) - goto fail; - Dentry *de; - res = get_or_create(todir, toname, &de); - if (res < 0) - goto fail; - - if (op == CEPH_MDS_OP_RENAME) { - req->set_old_dentry(oldde); - req->old_dentry_drop = CEPH_CAP_FILE_SHARED; - req->old_dentry_unless = CEPH_CAP_FILE_EXCL; - - req->set_dentry(de); - req->dentry_drop = CEPH_CAP_FILE_SHARED; - req->dentry_unless = CEPH_CAP_FILE_EXCL; - - InodeRef oldin, otherin; - res = _lookup(fromdir, fromname, 0, &oldin, perm); - if (res < 0) - goto fail; - req->set_old_inode(oldin.get()); - req->old_inode_drop = CEPH_CAP_LINK_SHARED; - - res = _lookup(todir, toname, 0, &otherin, perm); - if (res != 0 && res != -ENOENT) { - goto fail; - } else if (res == 0) { - req->set_other_inode(otherin.get()); - req->other_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; - } - - req->set_inode(todir); - } else { - // renamesnap reply contains no tracedn, so we need to invalidate - // dentry manually - unlink(oldde, true, true); - unlink(de, true, true); - } - - res = make_request(req, perm, &target); - ldout(cct, 10) << "rename result is " << res << dendl; - - // renamed item from our cache - - trim_cache(); - ldout(cct, 3) << "_rename(" << from << ", " << to << ") = " << res << dendl; - return res; - - fail: - put_request(req); - return res; -} - -int Client::ll_rename(Inode *parent, const char *name, Inode *newparent, - const char *newname, const UserPerm& perm) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vparent = _get_vino(parent); - vinodeno_t vnewparent = _get_vino(newparent); - - ldout(cct, 3) << "ll_rename " << vparent << " " << name << " to " - << vnewparent << " " << newname << dendl; - tout(cct) << "ll_rename" << std::endl; - tout(cct) << vparent.ino.val << std::endl; - tout(cct) << name << std::endl; - tout(cct) << vnewparent.ino.val << std::endl; - tout(cct) << newname << std::endl; - - if (!cct->_conf->fuse_default_permissions) { - int r = may_delete(parent, name, perm); - if (r < 0) - return r; - r = may_delete(newparent, newname, perm); - if (r < 0 && r != -ENOENT) - return r; - } - - return _rename(parent, name, newparent, newname, perm); -} - -int Client::_link(Inode *in, Inode *dir, const char *newname, const UserPerm& perm, InodeRef *inp) -{ - ldout(cct, 3) << "_link(" << in->ino << " to " << dir->ino << " " << newname - << " uid " << perm.uid() << " gid " << perm.gid() << ")" << dendl; - - if (strlen(newname) > NAME_MAX) - return -ENAMETOOLONG; - - if (in->snapid != CEPH_NOSNAP || dir->snapid != CEPH_NOSNAP) { - return -EROFS; - } - if (is_quota_files_exceeded(dir, perm)) { - return -EDQUOT; - } - - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_LINK); - - filepath path(newname, dir->ino); - req->set_filepath(path); - filepath existing(in->ino); - req->set_filepath2(existing); - - req->set_inode(dir); - req->inode_drop = CEPH_CAP_FILE_SHARED; - req->inode_unless = CEPH_CAP_FILE_EXCL; - - Dentry *de; - int res = get_or_create(dir, newname, &de); - if (res < 0) - goto fail; - req->set_dentry(de); - - res = make_request(req, perm, inp); - ldout(cct, 10) << "link result is " << res << dendl; - - trim_cache(); - ldout(cct, 3) << "link(" << existing << ", " << path << ") = " << res << dendl; - return res; - - fail: - put_request(req); - return res; -} - -int Client::ll_link(Inode *in, Inode *newparent, const char *newname, - const UserPerm& perm) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vino = _get_vino(in); - vinodeno_t vnewparent = _get_vino(newparent); - - ldout(cct, 3) << "ll_link " << vino << " to " << vnewparent << " " << - newname << dendl; - tout(cct) << "ll_link" << std::endl; - tout(cct) << vino.ino.val << std::endl; - tout(cct) << vnewparent << std::endl; - tout(cct) << newname << std::endl; - - int r = 0; - InodeRef target; - - if (!cct->_conf->fuse_default_permissions) { - if (S_ISDIR(in->mode)) - return -EPERM; - - r = may_hardlink(in, perm); - if (r < 0) - return r; - - r = may_create(newparent, perm); - if (r < 0) - return r; - } - - return _link(in, newparent, newname, perm, &target); -} - -int Client::ll_num_osds(void) -{ - Mutex::Locker lock(client_lock); - return objecter->with_osdmap(std::mem_fn(&OSDMap::get_num_osds)); -} - -int Client::ll_osdaddr(int osd, uint32_t *addr) -{ - Mutex::Locker lock(client_lock); - - entity_addr_t g; - bool exists = objecter->with_osdmap([&](const OSDMap& o) { - if (!o.exists(osd)) - return false; - g = o.get_addr(osd); - return true; - }); - if (!exists) - return -1; - uint32_t nb_addr = (g.in4_addr()).sin_addr.s_addr; - *addr = ntohl(nb_addr); - return 0; -} - -uint32_t Client::ll_stripe_unit(Inode *in) -{ - Mutex::Locker lock(client_lock); - return in->layout.stripe_unit; -} - -uint64_t Client::ll_snap_seq(Inode *in) -{ - Mutex::Locker lock(client_lock); - return in->snaprealm->seq; -} - -int Client::ll_file_layout(Inode *in, file_layout_t *layout) -{ - Mutex::Locker lock(client_lock); - *layout = in->layout; - return 0; -} - -int Client::ll_file_layout(Fh *fh, file_layout_t *layout) -{ - return ll_file_layout(fh->inode.get(), layout); -} - -/* Currently we cannot take advantage of redundancy in reads, since we - would have to go through all possible placement groups (a - potentially quite large number determined by a hash), and use CRUSH - to calculate the appropriate set of OSDs for each placement group, - then index into that. An array with one entry per OSD is much more - tractable and works for demonstration purposes. */ - -int Client::ll_get_stripe_osd(Inode *in, uint64_t blockno, - file_layout_t* layout) -{ - Mutex::Locker lock(client_lock); - - inodeno_t ino = ll_get_inodeno(in); - uint32_t object_size = layout->object_size; - uint32_t su = layout->stripe_unit; - uint32_t stripe_count = layout->stripe_count; - uint64_t stripes_per_object = object_size / su; - - uint64_t stripeno = blockno / stripe_count; // which horizontal stripe (Y) - uint64_t stripepos = blockno % stripe_count; // which object in the object set (X) - uint64_t objectsetno = stripeno / stripes_per_object; // which object set - uint64_t objectno = objectsetno * stripe_count + stripepos; // object id - - object_t oid = file_object_t(ino, objectno); - return objecter->with_osdmap([&](const OSDMap& o) { - ceph_object_layout olayout = - o.file_to_object_layout(oid, *layout); - pg_t pg = (pg_t)olayout.ol_pgid; - vector osds; - int primary; - o.pg_to_acting_osds(pg, &osds, &primary); - return primary; - }); -} - -/* Return the offset of the block, internal to the object */ - -uint64_t Client::ll_get_internal_offset(Inode *in, uint64_t blockno) -{ - Mutex::Locker lock(client_lock); - file_layout_t *layout=&(in->layout); - uint32_t object_size = layout->object_size; - uint32_t su = layout->stripe_unit; - uint64_t stripes_per_object = object_size / su; - - return (blockno % stripes_per_object) * su; -} - -int Client::ll_opendir(Inode *in, int flags, dir_result_t** dirpp, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vino = _get_vino(in); - - ldout(cct, 3) << "ll_opendir " << vino << dendl; - tout(cct) << "ll_opendir" << std::endl; - tout(cct) << vino.ino.val << std::endl; - - if (!cct->_conf->fuse_default_permissions) { - int r = may_open(in, flags, perms); - if (r < 0) - return r; - } - - int r = _opendir(in, dirpp, perms); - tout(cct) << (unsigned long)*dirpp << std::endl; - - ldout(cct, 3) << "ll_opendir " << vino << " = " << r << " (" << *dirpp << ")" - << dendl; - return r; -} - -int Client::ll_releasedir(dir_result_t *dirp) -{ - Mutex::Locker lock(client_lock); - ldout(cct, 3) << "ll_releasedir " << dirp << dendl; - tout(cct) << "ll_releasedir" << std::endl; - tout(cct) << (unsigned long)dirp << std::endl; - - if (unmounting) - return -ENOTCONN; - - _closedir(dirp); - return 0; -} - -int Client::ll_fsyncdir(dir_result_t *dirp) -{ - Mutex::Locker lock(client_lock); - ldout(cct, 3) << "ll_fsyncdir " << dirp << dendl; - tout(cct) << "ll_fsyncdir" << std::endl; - tout(cct) << (unsigned long)dirp << std::endl; - - if (unmounting) - return -ENOTCONN; - - return _fsync(dirp->inode.get(), false); -} - -int Client::ll_open(Inode *in, int flags, Fh **fhp, const UserPerm& perms) -{ - assert(!(flags & O_CREAT)); - - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vino = _get_vino(in); - - ldout(cct, 3) << "ll_open " << vino << " " << ceph_flags_sys2wire(flags) << dendl; - tout(cct) << "ll_open" << std::endl; - tout(cct) << vino.ino.val << std::endl; - tout(cct) << ceph_flags_sys2wire(flags) << std::endl; - - int r; - if (!cct->_conf->fuse_default_permissions) { - r = may_open(in, flags, perms); - if (r < 0) - goto out; - } - - r = _open(in, flags, 0, fhp /* may be NULL */, perms); - - out: - Fh *fhptr = fhp ? *fhp : NULL; - if (fhptr) { - ll_unclosed_fh_set.insert(fhptr); - } - tout(cct) << (unsigned long)fhptr << std::endl; - ldout(cct, 3) << "ll_open " << vino << " " << ceph_flags_sys2wire(flags) << - " = " << r << " (" << fhptr << ")" << dendl; - return r; -} - -int Client::_ll_create(Inode *parent, const char *name, mode_t mode, - int flags, InodeRef *in, int caps, Fh **fhp, - const UserPerm& perms) -{ - *fhp = NULL; - - vinodeno_t vparent = _get_vino(parent); - - ldout(cct, 3) << "_ll_create " << vparent << " " << name << " 0" << oct << - mode << dec << " " << ceph_flags_sys2wire(flags) << ", uid " << perms.uid() - << ", gid " << perms.gid() << dendl; - tout(cct) << "ll_create" << std::endl; - tout(cct) << vparent.ino.val << std::endl; - tout(cct) << name << std::endl; - tout(cct) << mode << std::endl; - tout(cct) << ceph_flags_sys2wire(flags) << std::endl; - - bool created = false; - int r = _lookup(parent, name, caps, in, perms); - - if (r == 0 && (flags & O_CREAT) && (flags & O_EXCL)) - return -EEXIST; - - if (r == -ENOENT && (flags & O_CREAT)) { - if (!cct->_conf->fuse_default_permissions) { - r = may_create(parent, perms); - if (r < 0) - goto out; - } - r = _create(parent, name, flags, mode, in, fhp, 0, 0, 0, NULL, &created, - perms); - if (r < 0) - goto out; - } - - if (r < 0) - goto out; - - assert(*in); - - ldout(cct, 20) << "_ll_create created = " << created << dendl; - if (!created) { - if (!cct->_conf->fuse_default_permissions) { - r = may_open(in->get(), flags, perms); - if (r < 0) { - if (*fhp) { - int release_r = _release_fh(*fhp); - assert(release_r == 0); // during create, no async data ops should have happened - } - goto out; - } - } - if (*fhp == NULL) { - r = _open(in->get(), flags, mode, fhp, perms); - if (r < 0) - goto out; - } - } - -out: - if (*fhp) { - ll_unclosed_fh_set.insert(*fhp); - } - - ino_t ino = 0; - if (r >= 0) { - Inode *inode = in->get(); - if (use_faked_inos()) - ino = inode->faked_ino; - else - ino = inode->ino; - } - - tout(cct) << (unsigned long)*fhp << std::endl; - tout(cct) << ino << std::endl; - ldout(cct, 3) << "_ll_create " << vparent << " " << name << " 0" << oct << - mode << dec << " " << ceph_flags_sys2wire(flags) << " = " << r << " (" << - *fhp << " " << hex << ino << dec << ")" << dendl; - - return r; -} - -int Client::ll_create(Inode *parent, const char *name, mode_t mode, - int flags, struct stat *attr, Inode **outp, Fh **fhp, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - InodeRef in; - - if (unmounting) - return -ENOTCONN; - - int r = _ll_create(parent, name, mode, flags, &in, CEPH_STAT_CAP_INODE_ALL, - fhp, perms); - if (r >= 0) { - assert(in); - - // passing an Inode in outp requires an additional ref - if (outp) { - _ll_get(in.get()); - *outp = in.get(); - } - fill_stat(in, attr); - } else { - attr->st_ino = 0; - } - - return r; -} - -int Client::ll_createx(Inode *parent, const char *name, mode_t mode, - int oflags, Inode **outp, Fh **fhp, - struct ceph_statx *stx, unsigned want, unsigned lflags, - const UserPerm& perms) -{ - unsigned caps = statx_to_mask(lflags, want); - Mutex::Locker lock(client_lock); - InodeRef in; - - if (unmounting) - return -ENOTCONN; - - int r = _ll_create(parent, name, mode, oflags, &in, caps, fhp, perms); - if (r >= 0) { - assert(in); - - // passing an Inode in outp requires an additional ref - if (outp) { - _ll_get(in.get()); - *outp = in.get(); - } - fill_statx(in, caps, stx); - } else { - stx->stx_ino = 0; - stx->stx_mask = 0; - } - - return r; -} - -loff_t Client::ll_lseek(Fh *fh, loff_t offset, int whence) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "ll_lseek" << std::endl; - tout(cct) << offset << std::endl; - tout(cct) << whence << std::endl; - - if (unmounting) - return -ENOTCONN; - - return _lseek(fh, offset, whence); -} - -int Client::ll_read(Fh *fh, loff_t off, loff_t len, bufferlist *bl) -{ - Mutex::Locker lock(client_lock); - ldout(cct, 3) << "ll_read " << fh << " " << fh->inode->ino << " " << " " << off << "~" << len << dendl; - tout(cct) << "ll_read" << std::endl; - tout(cct) << (unsigned long)fh << std::endl; - tout(cct) << off << std::endl; - tout(cct) << len << std::endl; - - if (unmounting) - return -ENOTCONN; - - return _read(fh, off, len, bl); -} - -int Client::ll_read_block(Inode *in, uint64_t blockid, - char *buf, - uint64_t offset, - uint64_t length, - file_layout_t* layout) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - vinodeno_t vino = ll_get_vino(in); - object_t oid = file_object_t(vino.ino, blockid); - C_SaferCond onfinish; - bufferlist bl; - - objecter->read(oid, - object_locator_t(layout->pool_id), - offset, - length, - vino.snapid, - &bl, - CEPH_OSD_FLAG_READ, - &onfinish); - - client_lock.Unlock(); - int r = onfinish.wait(); - client_lock.Lock(); - - if (r >= 0) { - bl.copy(0, bl.length(), buf); - r = bl.length(); - } - - return r; -} - -/* It appears that the OSD doesn't return success unless the entire - buffer was written, return the write length on success. */ - -int Client::ll_write_block(Inode *in, uint64_t blockid, - char* buf, uint64_t offset, - uint64_t length, file_layout_t* layout, - uint64_t snapseq, uint32_t sync) -{ - Mutex flock("Client::ll_write_block flock"); - vinodeno_t vino = ll_get_vino(in); - Cond cond; - bool done; - int r = 0; - Context *onsafe = nullptr; - - if (length == 0) { - return -EINVAL; - } - if (true || sync) { - /* if write is stable, the epilogue is waiting on - * flock */ - onsafe = new C_SafeCond(&flock, &cond, &done, &r); - done = false; - } else { - /* if write is unstable, we just place a barrier for - * future commits to wait on */ - /*onsafe = new C_Block_Sync(this, vino.ino, - barrier_interval(offset, offset + length), &r); - */ - done = true; - } - object_t oid = file_object_t(vino.ino, blockid); - SnapContext fakesnap; - bufferptr bp; - if (length > 0) bp = buffer::copy(buf, length); - bufferlist bl; - bl.push_back(bp); - - ldout(cct, 1) << "ll_block_write for " << vino.ino << "." << blockid - << dendl; - - fakesnap.seq = snapseq; - - /* lock just in time */ - client_lock.Lock(); - if (unmounting) { - client_lock.Unlock(); - delete onsafe; - return -ENOTCONN; - } - - objecter->write(oid, - object_locator_t(layout->pool_id), - offset, - length, - fakesnap, - bl, - ceph::real_clock::now(), - 0, - onsafe); - - client_lock.Unlock(); - if (!done /* also !sync */) { - flock.Lock(); - while (! done) - cond.Wait(flock); - flock.Unlock(); - } - - if (r < 0) { - return r; - } else { - return length; - } -} - -int Client::ll_commit_blocks(Inode *in, - uint64_t offset, - uint64_t length) -{ - Mutex::Locker lock(client_lock); - /* - BarrierContext *bctx; - vinodeno_t vino = ll_get_vino(in); - uint64_t ino = vino.ino; - - ldout(cct, 1) << "ll_commit_blocks for " << vino.ino << " from " - << offset << " to " << length << dendl; - - if (length == 0) { - return -EINVAL; - } - - map::iterator p = barriers.find(ino); - if (p != barriers.end()) { - barrier_interval civ(offset, offset + length); - p->second->commit_barrier(civ); - } - */ - return 0; -} - -int Client::ll_write(Fh *fh, loff_t off, loff_t len, const char *data) -{ - Mutex::Locker lock(client_lock); - ldout(cct, 3) << "ll_write " << fh << " " << fh->inode->ino << " " << off << - "~" << len << dendl; - tout(cct) << "ll_write" << std::endl; - tout(cct) << (unsigned long)fh << std::endl; - tout(cct) << off << std::endl; - tout(cct) << len << std::endl; - - if (unmounting) - return -ENOTCONN; - - int r = _write(fh, off, len, data, NULL, 0); - ldout(cct, 3) << "ll_write " << fh << " " << off << "~" << len << " = " << r - << dendl; - return r; -} - -int Client::ll_flush(Fh *fh) -{ - Mutex::Locker lock(client_lock); - ldout(cct, 3) << "ll_flush " << fh << " " << fh->inode->ino << " " << dendl; - tout(cct) << "ll_flush" << std::endl; - tout(cct) << (unsigned long)fh << std::endl; - - if (unmounting) - return -ENOTCONN; - - return _flush(fh); -} - -int Client::ll_fsync(Fh *fh, bool syncdataonly) -{ - Mutex::Locker lock(client_lock); - ldout(cct, 3) << "ll_fsync " << fh << " " << fh->inode->ino << " " << dendl; - tout(cct) << "ll_fsync" << std::endl; - tout(cct) << (unsigned long)fh << std::endl; - - if (unmounting) - return -ENOTCONN; - - int r = _fsync(fh, syncdataonly); - if (r) { - // If we're returning an error, clear it from the FH - fh->take_async_err(); - } - return r; -} - -#ifdef FALLOC_FL_PUNCH_HOLE - -int Client::_fallocate(Fh *fh, int mode, int64_t offset, int64_t length) -{ - if (offset < 0 || length <= 0) - return -EINVAL; - - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) - return -EOPNOTSUPP; - - if ((mode & FALLOC_FL_PUNCH_HOLE) && !(mode & FALLOC_FL_KEEP_SIZE)) - return -EOPNOTSUPP; - - Inode *in = fh->inode.get(); - - if (objecter->osdmap_pool_full(in->layout.pool_id) && - !(mode & FALLOC_FL_PUNCH_HOLE)) { - return -ENOSPC; - } - - if (in->snapid != CEPH_NOSNAP) - return -EROFS; - - if ((fh->mode & CEPH_FILE_MODE_WR) == 0) - return -EBADF; - - uint64_t size = offset + length; - if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE)) && - size > in->size && - is_quota_bytes_exceeded(in, size - in->size, fh->actor_perms)) { - return -EDQUOT; - } - - int have; - int r = get_caps(in, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER, &have, -1); - if (r < 0) - return r; - - Mutex uninline_flock("Client::_fallocate_uninline_data flock"); - Cond uninline_cond; - bool uninline_done = false; - int uninline_ret = 0; - Context *onuninline = NULL; - - if (mode & FALLOC_FL_PUNCH_HOLE) { - if (in->inline_version < CEPH_INLINE_NONE && - (have & CEPH_CAP_FILE_BUFFER)) { - bufferlist bl; - int len = in->inline_data.length(); - if (offset < len) { - if (offset > 0) - in->inline_data.copy(0, offset, bl); - int size = length; - if (offset + size > len) - size = len - offset; - if (size > 0) - bl.append_zero(size); - if (offset + size < len) - in->inline_data.copy(offset + size, len - offset - size, bl); - in->inline_data = bl; - in->inline_version++; - } - in->mtime = ceph_clock_now(); - in->change_attr++; - mark_caps_dirty(in, CEPH_CAP_FILE_WR); - } else { - if (in->inline_version < CEPH_INLINE_NONE) { - onuninline = new C_SafeCond(&uninline_flock, - &uninline_cond, - &uninline_done, - &uninline_ret); - uninline_data(in, onuninline); - } - - Mutex flock("Client::_punch_hole flock"); - Cond cond; - bool done = false; - Context *onfinish = new C_SafeCond(&flock, &cond, &done); - - unsafe_sync_write++; - get_cap_ref(in, CEPH_CAP_FILE_BUFFER); - - _invalidate_inode_cache(in, offset, length); - filer->zero(in->ino, &in->layout, - in->snaprealm->get_snap_context(), - offset, length, - ceph::real_clock::now(), - 0, true, onfinish); - in->mtime = ceph_clock_now(); - in->change_attr++; - mark_caps_dirty(in, CEPH_CAP_FILE_WR); - - client_lock.Unlock(); - flock.Lock(); - while (!done) - cond.Wait(flock); - flock.Unlock(); - client_lock.Lock(); - _sync_write_commit(in); - } - } else if (!(mode & FALLOC_FL_KEEP_SIZE)) { - uint64_t size = offset + length; - if (size > in->size) { - in->size = size; - in->mtime = ceph_clock_now(); - in->change_attr++; - mark_caps_dirty(in, CEPH_CAP_FILE_WR); - - if (is_quota_bytes_approaching(in, fh->actor_perms)) { - check_caps(in, CHECK_CAPS_NODELAY); - } else if (is_max_size_approaching(in)) { - check_caps(in, 0); - } - } - } - - if (onuninline) { - client_lock.Unlock(); - uninline_flock.Lock(); - while (!uninline_done) - uninline_cond.Wait(uninline_flock); - uninline_flock.Unlock(); - client_lock.Lock(); - - if (uninline_ret >= 0 || uninline_ret == -ECANCELED) { - in->inline_data.clear(); - in->inline_version = CEPH_INLINE_NONE; - mark_caps_dirty(in, CEPH_CAP_FILE_WR); - check_caps(in, 0); - } else - r = uninline_ret; - } - - put_cap_ref(in, CEPH_CAP_FILE_WR); - return r; -} -#else - -int Client::_fallocate(Fh *fh, int mode, int64_t offset, int64_t length) -{ - return -EOPNOTSUPP; -} - -#endif - - -int Client::ll_fallocate(Fh *fh, int mode, loff_t offset, loff_t length) -{ - Mutex::Locker lock(client_lock); - ldout(cct, 3) << "ll_fallocate " << fh << " " << fh->inode->ino << " " << dendl; - tout(cct) << "ll_fallocate " << mode << " " << offset << " " << length << std::endl; - tout(cct) << (unsigned long)fh << std::endl; - - if (unmounting) - return -ENOTCONN; - - return _fallocate(fh, mode, offset, length); -} - -int Client::fallocate(int fd, int mode, loff_t offset, loff_t length) -{ - Mutex::Locker lock(client_lock); - tout(cct) << "fallocate " << " " << fd << mode << " " << offset << " " << length << std::endl; - - if (unmounting) - return -ENOTCONN; - - Fh *fh = get_filehandle(fd); - if (!fh) - return -EBADF; -#if defined(__linux__) && defined(O_PATH) - if (fh->flags & O_PATH) - return -EBADF; -#endif - return _fallocate(fh, mode, offset, length); -} - -int Client::ll_release(Fh *fh) -{ - Mutex::Locker lock(client_lock); - ldout(cct, 3) << "ll_release (fh)" << fh << " " << fh->inode->ino << " " << - dendl; - tout(cct) << "ll_release (fh)" << std::endl; - tout(cct) << (unsigned long)fh << std::endl; - - if (unmounting) - return -ENOTCONN; - - if (ll_unclosed_fh_set.count(fh)) - ll_unclosed_fh_set.erase(fh); - return _release_fh(fh); -} - -int Client::ll_getlk(Fh *fh, struct flock *fl, uint64_t owner) -{ - Mutex::Locker lock(client_lock); - - ldout(cct, 3) << "ll_getlk (fh)" << fh << " " << fh->inode->ino << dendl; - tout(cct) << "ll_getk (fh)" << (unsigned long)fh << std::endl; - - if (unmounting) - return -ENOTCONN; - - return _getlk(fh, fl, owner); -} - -int Client::ll_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep) -{ - Mutex::Locker lock(client_lock); - - ldout(cct, 3) << "ll_setlk (fh) " << fh << " " << fh->inode->ino << dendl; - tout(cct) << "ll_setk (fh)" << (unsigned long)fh << std::endl; - - if (unmounting) - return -ENOTCONN; - - return _setlk(fh, fl, owner, sleep); -} - -int Client::ll_flock(Fh *fh, int cmd, uint64_t owner) -{ - Mutex::Locker lock(client_lock); - - ldout(cct, 3) << "ll_flock (fh) " << fh << " " << fh->inode->ino << dendl; - tout(cct) << "ll_flock (fh)" << (unsigned long)fh << std::endl; - - if (unmounting) - return -ENOTCONN; - - return _flock(fh, cmd, owner); -} - -class C_Client_RequestInterrupt : public Context { -private: - Client *client; - MetaRequest *req; -public: - C_Client_RequestInterrupt(Client *c, MetaRequest *r) : client(c), req(r) { - req->get(); - } - void finish(int r) override { - Mutex::Locker l(client->client_lock); - assert(req->head.op == CEPH_MDS_OP_SETFILELOCK); - client->_interrupt_filelock(req); - client->put_request(req); - } -}; - -void Client::ll_interrupt(void *d) -{ - MetaRequest *req = static_cast(d); - ldout(cct, 3) << "ll_interrupt tid " << req->get_tid() << dendl; - tout(cct) << "ll_interrupt tid " << req->get_tid() << std::endl; - interrupt_finisher.queue(new C_Client_RequestInterrupt(this, req)); -} - -// ========================================= -// layout - -// expose file layouts - -int Client::describe_layout(const char *relpath, file_layout_t *lp, - const UserPerm& perms) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - filepath path(relpath); - InodeRef in; - int r = path_walk(path, &in, perms); - if (r < 0) - return r; - - *lp = in->layout; - - ldout(cct, 3) << "describe_layout(" << relpath << ") = 0" << dendl; - return 0; -} - -int Client::fdescribe_layout(int fd, file_layout_t *lp) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; - Inode *in = f->inode.get(); - - *lp = in->layout; - - ldout(cct, 3) << "fdescribe_layout(" << fd << ") = 0" << dendl; - return 0; -} - -int64_t Client::get_default_pool_id() -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - /* first data pool is the default */ - return mdsmap->get_first_data_pool(); -} - -// expose osdmap - -int64_t Client::get_pool_id(const char *pool_name) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - return objecter->with_osdmap(std::mem_fn(&OSDMap::lookup_pg_pool_name), - pool_name); -} - -string Client::get_pool_name(int64_t pool) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return string(); - - return objecter->with_osdmap([pool](const OSDMap& o) { - return o.have_pg_pool(pool) ? o.get_pool_name(pool) : string(); - }); -} - -int Client::get_pool_replication(int64_t pool) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - return objecter->with_osdmap([pool](const OSDMap& o) { - return o.have_pg_pool(pool) ? o.get_pg_pool(pool)->get_size() : -ENOENT; - }); -} - -int Client::get_file_extent_osds(int fd, loff_t off, loff_t *len, vector& osds) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; - Inode *in = f->inode.get(); - - vector extents; - Striper::file_to_extents(cct, in->ino, &in->layout, off, 1, in->truncate_size, extents); - assert(extents.size() == 1); - - objecter->with_osdmap([&](const OSDMap& o) { - pg_t pg = o.object_locator_to_pg(extents[0].oid, extents[0].oloc); - o.pg_to_acting_osds(pg, osds); - }); - - if (osds.empty()) - return -EINVAL; - - /* - * Return the remainder of the extent (stripe unit) - * - * If length = 1 is passed to Striper::file_to_extents we get a single - * extent back, but its length is one so we still need to compute the length - * to the end of the stripe unit. - * - * If length = su then we may get 1 or 2 objects back in the extents vector - * which would have to be examined. Even then, the offsets are local to the - * object, so matching up to the file offset is extra work. - * - * It seems simpler to stick with length = 1 and manually compute the - * remainder. - */ - if (len) { - uint64_t su = in->layout.stripe_unit; - *len = su - (off % su); - } - - return 0; -} - -int Client::get_osd_crush_location(int id, vector >& path) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - if (id < 0) - return -EINVAL; - return objecter->with_osdmap([&](const OSDMap& o) { - return o.crush->get_full_location_ordered(id, path); - }); -} - -int Client::get_file_stripe_address(int fd, loff_t offset, - vector& address) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; - Inode *in = f->inode.get(); - - // which object? - vector extents; - Striper::file_to_extents(cct, in->ino, &in->layout, offset, 1, - in->truncate_size, extents); - assert(extents.size() == 1); - - // now we have the object and its 'layout' - return objecter->with_osdmap([&](const OSDMap& o) { - pg_t pg = o.object_locator_to_pg(extents[0].oid, extents[0].oloc); - vector osds; - o.pg_to_acting_osds(pg, osds); - if (osds.empty()) - return -EINVAL; - for (unsigned i = 0; i < osds.size(); i++) { - entity_addr_t addr = o.get_addr(osds[i]); - address.push_back(addr); - } - return 0; - }); -} - -int Client::get_osd_addr(int osd, entity_addr_t& addr) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - return objecter->with_osdmap([&](const OSDMap& o) { - if (!o.exists(osd)) - return -ENOENT; - - addr = o.get_addr(osd); - return 0; - }); -} - -int Client::enumerate_layout(int fd, vector& result, - loff_t length, loff_t offset) -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - Fh *f = get_filehandle(fd); - if (!f) - return -EBADF; - Inode *in = f->inode.get(); - - // map to a list of extents - Striper::file_to_extents(cct, in->ino, &in->layout, offset, length, in->truncate_size, result); - - ldout(cct, 3) << "enumerate_layout(" << fd << ", " << length << ", " << offset << ") = 0" << dendl; - return 0; -} - - -/* - * find an osd with the same ip. -1 if none. - */ -int Client::get_local_osd() -{ - Mutex::Locker lock(client_lock); - - if (unmounting) - return -ENOTCONN; - - objecter->with_osdmap([this](const OSDMap& o) { - if (o.get_epoch() != local_osd_epoch) { - local_osd = o.find_osd_on_ip(messenger->get_myaddr()); - local_osd_epoch = o.get_epoch(); - } - }); - return local_osd; -} - - - - - - -// =============================== - -void Client::ms_handle_connect(Connection *con) -{ - ldout(cct, 10) << "ms_handle_connect on " << con->get_peer_addr() << dendl; -} - -bool Client::ms_handle_reset(Connection *con) -{ - ldout(cct, 0) << "ms_handle_reset on " << con->get_peer_addr() << dendl; - return false; -} - -void Client::ms_handle_remote_reset(Connection *con) -{ - ldout(cct, 0) << "ms_handle_remote_reset on " << con->get_peer_addr() << dendl; - Mutex::Locker l(client_lock); - switch (con->get_peer_type()) { - case CEPH_ENTITY_TYPE_MDS: - { - // kludge to figure out which mds this is; fixme with a Connection* state - mds_rank_t mds = MDS_RANK_NONE; - MetaSession *s = NULL; - for (map::iterator p = mds_sessions.begin(); - p != mds_sessions.end(); - ++p) { - if (mdsmap->get_addr(p->first) == con->get_peer_addr()) { - mds = p->first; - s = p->second; - } - } - if (mds >= 0) { - assert (s != NULL); - switch (s->state) { - case MetaSession::STATE_CLOSING: - ldout(cct, 1) << "reset from mds we were closing; we'll call that closed" << dendl; - _closed_mds_session(s); - break; - - case MetaSession::STATE_OPENING: - { - ldout(cct, 1) << "reset from mds we were opening; retrying" << dendl; - list waiters; - waiters.swap(s->waiting_for_open); - _closed_mds_session(s); - MetaSession *news = _get_or_open_mds_session(mds); - news->waiting_for_open.swap(waiters); - } - break; - - case MetaSession::STATE_OPEN: - { - const md_config_t *conf = cct->_conf; - if (conf->client_reconnect_stale) { - ldout(cct, 1) << "reset from mds we were open; close mds session for reconnect" << dendl; - _closed_mds_session(s); - } else { - ldout(cct, 1) << "reset from mds we were open; mark session as stale" << dendl; - s->state = MetaSession::STATE_STALE; - } - } - break; - - case MetaSession::STATE_NEW: - case MetaSession::STATE_CLOSED: - default: - break; - } - } - } - break; - } -} - -bool Client::ms_handle_refused(Connection *con) -{ - ldout(cct, 1) << "ms_handle_refused on " << con->get_peer_addr() << dendl; - return false; -} - -bool Client::ms_get_authorizer(int dest_type, AuthAuthorizer **authorizer, bool force_new) -{ - if (dest_type == CEPH_ENTITY_TYPE_MON) - return true; - *authorizer = monclient->build_authorizer(dest_type); - return true; -} - -Inode *Client::get_quota_root(Inode *in, const UserPerm& perms) -{ - Inode *cur = in; - utime_t now = ceph_clock_now(); - - while (cur) { - if (cur != in && cur->quota.is_enable()) - break; - - Inode *parent_in = NULL; - if (!cur->dn_set.empty()) { - for (auto p = cur->dn_set.begin(); p != cur->dn_set.end(); ++p) { - Dentry *dn = *p; - if (dn->lease_mds >= 0 && - dn->lease_ttl > now && - mds_sessions.count(dn->lease_mds)) { - parent_in = dn->dir->parent_inode; - } else { - Inode *diri = dn->dir->parent_inode; - if (diri->caps_issued_mask(CEPH_CAP_FILE_SHARED) && - diri->shared_gen == dn->cap_shared_gen) { - parent_in = dn->dir->parent_inode; - } - } - if (parent_in) - break; - } - } else if (root_parents.count(cur)) { - parent_in = root_parents[cur].get(); - } - - if (parent_in) { - cur = parent_in; - continue; - } - - if (cur == root_ancestor) - break; - - // deleted inode - if (cur->nlink == 0) { - cur = root_ancestor; - break; - } - - MetaRequest *req = new MetaRequest(CEPH_MDS_OP_LOOKUPNAME); - filepath path(cur->ino); - req->set_filepath(path); - req->set_inode(cur); - - InodeRef parent_ref; - int ret = make_request(req, perms, &parent_ref); - if (ret < 0) { - ldout(cct, 1) << __func__ << " " << in->vino() - << " failed to find parent of " << cur->vino() - << " err " << ret << dendl; - // FIXME: what to do? - cur = root_ancestor; - break; - } - - now = ceph_clock_now(); - if (cur == in) - cur = parent_ref.get(); - else - cur = in; // start over - } - - ldout(cct, 10) << __func__ << " " << in->vino() << " -> " << cur->vino() << dendl; - return cur; -} - -/** - * Traverse quota ancestors of the Inode, return true - * if any of them passes the passed function - */ -bool Client::check_quota_condition(Inode *in, const UserPerm& perms, - std::function test) -{ - while (true) { - assert(in != NULL); - if (test(*in)) { - return true; - } - - if (in == root_ancestor) { - // We're done traversing, drop out - return false; - } else { - // Continue up the tree - in = get_quota_root(in, perms); - } - } - - return false; -} - -bool Client::is_quota_files_exceeded(Inode *in, const UserPerm& perms) -{ - return check_quota_condition(in, perms, - [](const Inode &in) { - return in.quota.max_files && in.rstat.rsize() >= in.quota.max_files; - }); -} - -bool Client::is_quota_bytes_exceeded(Inode *in, int64_t new_bytes, - const UserPerm& perms) -{ - return check_quota_condition(in, perms, - [&new_bytes](const Inode &in) { - return in.quota.max_bytes && (in.rstat.rbytes + new_bytes) - > in.quota.max_bytes; - }); -} - -bool Client::is_quota_bytes_approaching(Inode *in, const UserPerm& perms) -{ - return check_quota_condition(in, perms, - [](const Inode &in) { - if (in.quota.max_bytes) { - if (in.rstat.rbytes >= in.quota.max_bytes) { - return true; - } - - assert(in.size >= in.reported_size); - const uint64_t space = in.quota.max_bytes - in.rstat.rbytes; - const uint64_t size = in.size - in.reported_size; - return (space >> 4) < size; - } else { - return false; - } - }); -} - -enum { - POOL_CHECKED = 1, - POOL_CHECKING = 2, - POOL_READ = 4, - POOL_WRITE = 8, -}; - -int Client::check_pool_perm(Inode *in, int need) -{ - if (!cct->_conf->client_check_pool_perm) - return 0; - - int64_t pool_id = in->layout.pool_id; - std::string pool_ns = in->layout.pool_ns; - std::pair perm_key(pool_id, pool_ns); - int have = 0; - while (true) { - auto it = pool_perms.find(perm_key); - if (it == pool_perms.end()) - break; - if (it->second == POOL_CHECKING) { - // avoid concurrent checkings - wait_on_list(waiting_for_pool_perm); - } else { - have = it->second; - assert(have & POOL_CHECKED); - break; - } - } - - if (!have) { - if (in->snapid != CEPH_NOSNAP) { - // pool permission check needs to write to the first object. But for snapshot, - // head of the first object may have alread been deleted. To avoid creating - // orphan object, skip the check for now. - return 0; - } - - pool_perms[perm_key] = POOL_CHECKING; - - char oid_buf[32]; - snprintf(oid_buf, sizeof(oid_buf), "%llx.00000000", (unsigned long long)in->ino); - object_t oid = oid_buf; - - SnapContext nullsnapc; - - C_SaferCond rd_cond; - ObjectOperation rd_op; - rd_op.stat(NULL, (ceph::real_time*)nullptr, NULL); - - objecter->mutate(oid, OSDMap::file_to_object_locator(in->layout), rd_op, - nullsnapc, ceph::real_clock::now(), 0, &rd_cond); - - C_SaferCond wr_cond; - ObjectOperation wr_op; - wr_op.create(true); - - objecter->mutate(oid, OSDMap::file_to_object_locator(in->layout), wr_op, - nullsnapc, ceph::real_clock::now(), 0, &wr_cond); - - client_lock.Unlock(); - int rd_ret = rd_cond.wait(); - int wr_ret = wr_cond.wait(); - client_lock.Lock(); - - bool errored = false; - - if (rd_ret == 0 || rd_ret == -ENOENT) - have |= POOL_READ; - else if (rd_ret != -EPERM) { - ldout(cct, 10) << "check_pool_perm on pool " << pool_id << " ns " << pool_ns - << " rd_err = " << rd_ret << " wr_err = " << wr_ret << dendl; - errored = true; - } - - if (wr_ret == 0 || wr_ret == -EEXIST) - have |= POOL_WRITE; - else if (wr_ret != -EPERM) { - ldout(cct, 10) << "check_pool_perm on pool " << pool_id << " ns " << pool_ns - << " rd_err = " << rd_ret << " wr_err = " << wr_ret << dendl; - errored = true; - } - - if (errored) { - // Indeterminate: erase CHECKING state so that subsequent calls re-check. - // Raise EIO because actual error code might be misleading for - // userspace filesystem user. - pool_perms.erase(perm_key); - signal_cond_list(waiting_for_pool_perm); - return -EIO; - } - - pool_perms[perm_key] = have | POOL_CHECKED; - signal_cond_list(waiting_for_pool_perm); - } - - if ((need & CEPH_CAP_FILE_RD) && !(have & POOL_READ)) { - ldout(cct, 10) << "check_pool_perm on pool " << pool_id << " ns " << pool_ns - << " need " << ccap_string(need) << ", but no read perm" << dendl; - return -EPERM; - } - if ((need & CEPH_CAP_FILE_WR) && !(have & POOL_WRITE)) { - ldout(cct, 10) << "check_pool_perm on pool " << pool_id << " ns " << pool_ns - << " need " << ccap_string(need) << ", but no write perm" << dendl; - return -EPERM; - } - - return 0; -} - -int Client::_posix_acl_permission(Inode *in, const UserPerm& perms, unsigned want) -{ - if (acl_type == POSIX_ACL) { - if (in->xattrs.count(ACL_EA_ACCESS)) { - const bufferptr& access_acl = in->xattrs[ACL_EA_ACCESS]; - - return posix_acl_permits(access_acl, in->uid, in->gid, perms, want); - } - } - return -EAGAIN; -} - -int Client::_posix_acl_chmod(Inode *in, mode_t mode, const UserPerm& perms) -{ - if (acl_type == NO_ACL) - return 0; - - int r = _getattr(in, CEPH_STAT_CAP_XATTR, perms, in->xattr_version == 0); - if (r < 0) - goto out; - - if (acl_type == POSIX_ACL) { - if (in->xattrs.count(ACL_EA_ACCESS)) { - const bufferptr& access_acl = in->xattrs[ACL_EA_ACCESS]; - bufferptr acl(access_acl.c_str(), access_acl.length()); - r = posix_acl_access_chmod(acl, mode); - if (r < 0) - goto out; - r = _do_setxattr(in, ACL_EA_ACCESS, acl.c_str(), acl.length(), 0, perms); - } else { - r = 0; - } - } -out: - ldout(cct, 10) << __func__ << " ino " << in->ino << " result=" << r << dendl; - return r; -} - -int Client::_posix_acl_create(Inode *dir, mode_t *mode, bufferlist& xattrs_bl, - const UserPerm& perms) -{ - if (acl_type == NO_ACL) - return 0; - - if (S_ISLNK(*mode)) - return 0; - - int r = _getattr(dir, CEPH_STAT_CAP_XATTR, perms, dir->xattr_version == 0); - if (r < 0) - goto out; - - if (acl_type == POSIX_ACL) { - if (dir->xattrs.count(ACL_EA_DEFAULT)) { - map xattrs; - - const bufferptr& default_acl = dir->xattrs[ACL_EA_DEFAULT]; - bufferptr acl(default_acl.c_str(), default_acl.length()); - r = posix_acl_inherit_mode(acl, mode); - if (r < 0) - goto out; - - if (r > 0) { - r = posix_acl_equiv_mode(acl.c_str(), acl.length(), mode); - if (r < 0) - goto out; - if (r > 0) - xattrs[ACL_EA_ACCESS] = acl; - } - - if (S_ISDIR(*mode)) - xattrs[ACL_EA_DEFAULT] = dir->xattrs[ACL_EA_DEFAULT]; - - r = xattrs.size(); - if (r > 0) - ::encode(xattrs, xattrs_bl); - } else { - if (umask_cb) - *mode &= ~umask_cb(callback_handle); - r = 0; - } - } -out: - ldout(cct, 10) << __func__ << " dir ino " << dir->ino << " result=" << r << dendl; - return r; -} - -void Client::set_filer_flags(int flags) -{ - Mutex::Locker l(client_lock); - assert(flags == 0 || - flags == CEPH_OSD_FLAG_LOCALIZE_READS); - objecter->add_global_op_flags(flags); -} - -void Client::clear_filer_flags(int flags) -{ - Mutex::Locker l(client_lock); - assert(flags == CEPH_OSD_FLAG_LOCALIZE_READS); - objecter->clear_global_op_flag(flags); -} - -/** - * This is included in cap release messages, to cause - * the MDS to wait until this OSD map epoch. It is necessary - * in corner cases where we cancel RADOS ops, so that - * nobody else tries to do IO to the same objects in - * the same epoch as the cancelled ops. - */ -void Client::set_cap_epoch_barrier(epoch_t e) -{ - ldout(cct, 5) << __func__ << " epoch = " << e << dendl; - cap_epoch_barrier = e; -} - -const char** Client::get_tracked_conf_keys() const -{ - static const char* keys[] = { - "client_cache_size", - "client_cache_mid", - "client_acl_type", - NULL - }; - return keys; -} - -void Client::handle_conf_change(const struct md_config_t *conf, - const std::set &changed) -{ - Mutex::Locker lock(client_lock); - - if (changed.count("client_cache_mid")) { - lru.lru_set_midpoint(cct->_conf->client_cache_mid); - } - if (changed.count("client_acl_type")) { - acl_type = NO_ACL; - if (cct->_conf->client_acl_type == "posix_acl") - acl_type = POSIX_ACL; - } -} - -void Client::init_groups(UserPerm *perms) -{ - gid_t *sgids; - int count = _getgrouplist(&sgids, perms->uid(), perms->gid()); - perms->init_gids(sgids, count); -} - -void intrusive_ptr_add_ref(Inode *in) -{ - in->get(); -} - -void intrusive_ptr_release(Inode *in) -{ - in->client->put_inode(in); -} - -mds_rank_t Client::_get_random_up_mds() const -{ - assert(client_lock.is_locked_by_me()); - - std::set up; - mdsmap->get_up_mds_set(up); - - if (up.empty()) - return MDS_RANK_NONE; - std::set::const_iterator p = up.begin(); - for (int n = rand() % up.size(); n; n--) - ++p; - return *p; -} - - -StandaloneClient::StandaloneClient(Messenger *m, MonClient *mc) - : Client(m, mc, new Objecter(m->cct, m, mc, NULL, 0, 0)) -{ - monclient->set_messenger(m); - objecter->set_client_incarnation(0); -} - -StandaloneClient::~StandaloneClient() -{ - delete objecter; - objecter = nullptr; -} - -int StandaloneClient::init() -{ - timer.init(); - objectcacher->start(); - objecter->init(); - - client_lock.Lock(); - assert(!initialized); - - messenger->add_dispatcher_tail(objecter); - messenger->add_dispatcher_tail(this); - - monclient->set_want_keys(CEPH_ENTITY_TYPE_MDS | CEPH_ENTITY_TYPE_OSD); - int r = monclient->init(); - if (r < 0) { - // need to do cleanup because we're in an intermediate init state - timer.shutdown(); - client_lock.Unlock(); - objecter->shutdown(); - objectcacher->stop(); - monclient->shutdown(); - return r; - } - objecter->start(); - - client_lock.Unlock(); - _finish_init(); - - return 0; -} - -void StandaloneClient::shutdown() -{ - Client::shutdown(); - objecter->shutdown(); - monclient->shutdown(); -} -