X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Fmds%2FMDSRank.h;fp=src%2Fceph%2Fsrc%2Fmds%2FMDSRank.h;h=0000000000000000000000000000000000000000;hb=7da45d65be36d36b880cc55c5036e96c24b53f00;hp=7c6df739d90ce8f08eba1b15f321bae8dd8b65e0;hpb=691462d09d0987b47e112d6ee8740375df3c51b2;p=stor4nfv.git diff --git a/src/ceph/src/mds/MDSRank.h b/src/ceph/src/mds/MDSRank.h deleted file mode 100644 index 7c6df73..0000000 --- a/src/ceph/src/mds/MDSRank.h +++ /dev/null @@ -1,583 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2015 Red Hat - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#ifndef MDS_RANK_H_ -#define MDS_RANK_H_ - -#include "common/DecayCounter.h" -#include "common/LogClient.h" -#include "common/Timer.h" -#include "common/TrackedOp.h" - -#include "messages/MCommand.h" - -#include "Beacon.h" -#include "DamageTable.h" -#include "MDSMap.h" -#include "SessionMap.h" -#include "MDCache.h" -#include "Migrator.h" -#include "MDLog.h" -#include "PurgeQueue.h" -#include "osdc/Journaler.h" - -// Full .h import instead of forward declaration for PerfCounter, for the -// benefit of those including this header and using MDSRank::logger -#include "common/perf_counters.h" - -enum { - l_mds_first = 2000, - l_mds_request, - l_mds_reply, - l_mds_reply_latency, - l_mds_forward, - l_mds_dir_fetch, - l_mds_dir_commit, - l_mds_dir_split, - l_mds_dir_merge, - l_mds_inode_max, - l_mds_inodes, - l_mds_inodes_top, - l_mds_inodes_bottom, - l_mds_inodes_pin_tail, - l_mds_inodes_pinned, - l_mds_inodes_expired, - l_mds_inodes_with_caps, - l_mds_caps, - l_mds_subtrees, - l_mds_traverse, - l_mds_traverse_hit, - l_mds_traverse_forward, - l_mds_traverse_discover, - l_mds_traverse_dir_fetch, - l_mds_traverse_remote_ino, - l_mds_traverse_lock, - l_mds_load_cent, - l_mds_dispatch_queue_len, - l_mds_exported, - l_mds_exported_inodes, - l_mds_imported, - l_mds_imported_inodes, - l_mds_last, -}; - -// memory utilization -enum { - l_mdm_first = 2500, - l_mdm_ino, - l_mdm_inoa, - l_mdm_inos, - l_mdm_dir, - l_mdm_dira, - l_mdm_dirs, - l_mdm_dn, - l_mdm_dna, - l_mdm_dns, - l_mdm_cap, - l_mdm_capa, - l_mdm_caps, - l_mdm_rss, - l_mdm_heap, - l_mdm_buf, - l_mdm_last, -}; - -namespace ceph { - struct heartbeat_handle_d; -} - -class Server; -class Locker; -class MDCache; -class MDLog; -class MDBalancer; -class InoTable; -class SnapServer; -class SnapClient; -class MDSTableServer; -class MDSTableClient; -class Messenger; -class Objecter; -class MonClient; -class Finisher; -class MMDSMap; -class ScrubStack; - -/** - * The public part of this class's interface is what's exposed to all - * the various subsystems (server, mdcache, etc), such as pointers - * to the other subsystems, and message-sending calls. - */ -class MDSRank { - protected: - const mds_rank_t whoami; - - // Incarnation as seen in MDSMap at the point where a rank is - // assigned. - int incarnation; - - public: - mds_rank_t get_nodeid() const { return whoami; } - int64_t get_metadata_pool(); - - // Reference to global MDS::mds_lock, so that users of MDSRank don't - // carry around references to the outer MDS, and we can substitute - // a separate lock here in future potentially. - Mutex &mds_lock; - - bool is_daemon_stopping() const; - - // Reference to global cluster log client, just to avoid initialising - // a separate one here. - LogChannelRef &clog; - - // Reference to global timer utility, because MDSRank and MDSDaemon - // currently both use the same mds_lock, so it makes sense for them - // to share a timer. - SafeTimer &timer; - - MDSMap *&mdsmap; - - Objecter *objecter; - - // sub systems - Server *server; - MDCache *mdcache; - Locker *locker; - MDLog *mdlog; - MDBalancer *balancer; - ScrubStack *scrubstack; - DamageTable damage_table; - - - InoTable *inotable; - - SnapServer *snapserver; - SnapClient *snapclient; - - MDSTableClient *get_table_client(int t); - MDSTableServer *get_table_server(int t); - - SessionMap sessionmap; - Session *get_session(client_t client) { - return sessionmap.get_session(entity_name_t::CLIENT(client.v)); - } - - PerfCounters *logger, *mlogger; - OpTracker op_tracker; - - // The last different state I held before current - MDSMap::DaemonState last_state; - // The state assigned to me by the MDSMap - MDSMap::DaemonState state; - - bool cluster_degraded; - - MDSMap::DaemonState get_state() const { return state; } - MDSMap::DaemonState get_want_state() const { return beacon.get_want_state(); } - - bool is_creating() const { return state == MDSMap::STATE_CREATING; } - bool is_starting() const { return state == MDSMap::STATE_STARTING; } - bool is_standby() const { return state == MDSMap::STATE_STANDBY; } - bool is_replay() const { return state == MDSMap::STATE_REPLAY; } - bool is_standby_replay() const { return state == MDSMap::STATE_STANDBY_REPLAY; } - bool is_resolve() const { return state == MDSMap::STATE_RESOLVE; } - bool is_reconnect() const { return state == MDSMap::STATE_RECONNECT; } - bool is_rejoin() const { return state == MDSMap::STATE_REJOIN; } - bool is_clientreplay() const { return state == MDSMap::STATE_CLIENTREPLAY; } - bool is_active() const { return state == MDSMap::STATE_ACTIVE; } - bool is_stopping() const { return state == MDSMap::STATE_STOPPING; } - bool is_any_replay() const { return (is_replay() || is_standby_replay()); } - bool is_stopped() const { return mdsmap->is_stopped(whoami); } - bool is_cluster_degraded() const { return cluster_degraded; } - - void handle_write_error(int err); - - void handle_conf_change(const struct md_config_t *conf, - const std::set &changed) - { - purge_queue.handle_conf_change(conf, changed, *mdsmap); - } - - void update_mlogger(); - protected: - // Flag to indicate we entered shutdown: anyone seeing this to be true - // after taking mds_lock must drop out. - bool stopping; - - // PurgeQueue is only used by StrayManager, but it is owned by MDSRank - // because its init/shutdown happens at the top level. - PurgeQueue purge_queue; - - class ProgressThread : public Thread { - MDSRank *mds; - Cond cond; - public: - explicit ProgressThread(MDSRank *mds_) : mds(mds_) {} - void * entry() override; - void shutdown(); - void signal() {cond.Signal();} - } progress_thread; - - list waiting_for_nolaggy; - list finished_queue; - // Dispatch, retry, queues - int dispatch_depth; - void inc_dispatch_depth() { ++dispatch_depth; } - void dec_dispatch_depth() { --dispatch_depth; } - void retry_dispatch(Message *m); - bool handle_deferrable_message(Message *m); - void _advance_queues(); - bool _dispatch(Message *m, bool new_msg); - - ceph::heartbeat_handle_d *hb; // Heartbeat for threads using mds_lock - - bool is_stale_message(Message *m) const; - - map peer_mdsmap_epoch; - - ceph_tid_t last_tid; // for mds-initiated requests (e.g. stray rename) - - list waiting_for_active, waiting_for_replay, waiting_for_reconnect, waiting_for_resolve; - list replay_queue; - map > waiting_for_active_peer; - map > waiting_for_mdsmap; - - epoch_t osd_epoch_barrier; - - // Const reference to the beacon so that we can behave differently - // when it's laggy. - Beacon &beacon; - - /** - * Emit clog warnings for any ops reported as warnings by optracker - */ - void check_ops_in_flight(); - - int mds_slow_req_count; - - /** - * Share MDSMap with clients - */ - void bcast_mds_map(); // to mounted clients - epoch_t last_client_mdsmap_bcast; - - map export_targets; /* targets this MDS is exporting to or wants/tries to */ - - void create_logger(); - public: - - void queue_waiter(MDSInternalContextBase *c) { - finished_queue.push_back(c); - progress_thread.signal(); - } - void queue_waiters(list& ls) { - finished_queue.splice( finished_queue.end(), ls ); - progress_thread.signal(); - } - - MDSRank( - mds_rank_t whoami_, - Mutex &mds_lock_, - LogChannelRef &clog_, - SafeTimer &timer_, - Beacon &beacon_, - MDSMap *& mdsmap_, - Messenger *msgr, - MonClient *monc_, - Context *respawn_hook_, - Context *suicide_hook_); - - protected: - ~MDSRank(); - - public: - - // Daemon lifetime functions: these guys break the abstraction - // and call up into the parent MDSDaemon instance. It's kind - // of unavoidable: if we want any depth into our calls - // to be able to e.g. tear down the whole process, we have to - // have a reference going all the way down. - // >>> - void suicide(); - void respawn(); - // <<< - - /** - * Call this periodically if inside a potentially long running piece - * of code while holding the mds_lock - */ - void heartbeat_reset(); - - /** - * Report state DAMAGED to the mon, and then pass on to respawn(). Call - * this when an unrecoverable error is encountered while attempting - * to load an MDS rank's data structures. This is *not* for use with - * errors affecting normal dirfrag/inode objects -- they should be handled - * through cleaner scrub/repair mechanisms. - * - * Callers must already hold mds_lock. - */ - void damaged(); - - /** - * Wrapper around `damaged` for users who are not - * already holding mds_lock. - * - * Callers must not already hold mds_lock. - */ - void damaged_unlocked(); - - utime_t get_laggy_until() const; - - void send_message_mds(Message *m, mds_rank_t mds); - void forward_message_mds(Message *req, mds_rank_t mds); - - void send_message_client_counted(Message *m, client_t client); - void send_message_client_counted(Message *m, Session *session); - void send_message_client_counted(Message *m, Connection *connection); - void send_message_client_counted(Message *m, const ConnectionRef& con) { - send_message_client_counted(m, con.get()); - } - void send_message_client(Message *m, Session *session); - void send_message(Message *m, Connection *c); - void send_message(Message *m, const ConnectionRef& c) { - send_message(m, c.get()); - } - - void wait_for_active_peer(mds_rank_t who, MDSInternalContextBase *c) { - waiting_for_active_peer[who].push_back(c); - } - void wait_for_cluster_recovered(MDSInternalContextBase *c) { - assert(cluster_degraded); - waiting_for_active_peer[MDS_RANK_NONE].push_back(c); - } - - void wait_for_active(MDSInternalContextBase *c) { - waiting_for_active.push_back(c); - } - void wait_for_replay(MDSInternalContextBase *c) { - waiting_for_replay.push_back(c); - } - void wait_for_reconnect(MDSInternalContextBase *c) { - waiting_for_reconnect.push_back(c); - } - void wait_for_resolve(MDSInternalContextBase *c) { - waiting_for_resolve.push_back(c); - } - void wait_for_mdsmap(epoch_t e, MDSInternalContextBase *c) { - waiting_for_mdsmap[e].push_back(c); - } - void enqueue_replay(MDSInternalContextBase *c) { - replay_queue.push_back(c); - } - - bool queue_one_replay(); - - void set_osd_epoch_barrier(epoch_t e); - epoch_t get_osd_epoch_barrier() const {return osd_epoch_barrier;} - epoch_t get_osd_epoch() const; - - ceph_tid_t issue_tid() { return ++last_tid; } - - Finisher *finisher; - - MDSMap *get_mds_map() { return mdsmap; } - - int get_req_rate() const { return logger->get(l_mds_request); } - - int get_mds_slow_req_count() const { return mds_slow_req_count; } - - void dump_status(Formatter *f) const; - - void hit_export_target(utime_t now, mds_rank_t rank, double amount=-1.0); - bool is_export_target(mds_rank_t rank) { - const set& map_targets = mdsmap->get_mds_info(get_nodeid()).export_targets; - return map_targets.count(rank); - } - - bool evict_client(int64_t session_id, bool wait, bool blacklist, - std::stringstream& ss, Context *on_killed=nullptr); - - protected: - void dump_clientreplay_status(Formatter *f) const; - void command_scrub_path(Formatter *f, const string& path, vector& scrubop_vec); - void command_tag_path(Formatter *f, const string& path, - const string &tag); - void command_flush_path(Formatter *f, const string& path); - void command_flush_journal(Formatter *f); - void command_get_subtrees(Formatter *f); - void command_export_dir(Formatter *f, - const std::string &path, mds_rank_t dest); - bool command_dirfrag_split( - cmdmap_t cmdmap, - std::ostream &ss); - bool command_dirfrag_merge( - cmdmap_t cmdmap, - std::ostream &ss); - bool command_dirfrag_ls( - cmdmap_t cmdmap, - std::ostream &ss, - Formatter *f); - int _command_export_dir(const std::string &path, mds_rank_t dest); - int _command_flush_journal(std::stringstream *ss); - CDir *_command_dirfrag_get( - const cmdmap_t &cmdmap, - std::ostream &ss); - - protected: - Messenger *messenger; - MonClient *monc; - - Context *respawn_hook; - Context *suicide_hook; - - // Friended to access retry_dispatch - friend class C_MDS_RetryMessage; - - // FIXME the state machine logic should be separable from the dispatch - // logic that calls it. - // >>> - void calc_recovery_set(); - void request_state(MDSMap::DaemonState s); - - bool standby_replaying; // true if current replay pass is in standby-replay mode - - typedef enum { - // The MDSMap is available, configure default layouts and structures - MDS_BOOT_INITIAL = 0, - // We are ready to open some inodes - MDS_BOOT_OPEN_ROOT, - // We are ready to do a replay if needed - MDS_BOOT_PREPARE_LOG, - // Replay is complete - MDS_BOOT_REPLAY_DONE - } BootStep; - friend class C_MDS_BootStart; - friend class C_MDS_InternalBootStart; - void boot_create(); // i am new mds. - void boot_start(BootStep step=MDS_BOOT_INITIAL, int r=0); // starting|replay - - void replay_start(); - void creating_done(); - void starting_done(); - void replay_done(); - void standby_replay_restart(); - void _standby_replay_restart_finish(int r, uint64_t old_read_pos); - class C_MDS_StandbyReplayRestart; - class C_MDS_StandbyReplayRestartFinish; - - void reopen_log(); - - void resolve_start(); - void resolve_done(); - void reconnect_start(); - void reconnect_done(); - void rejoin_joint_start(); - void rejoin_start(); - void rejoin_done(); - void recovery_done(int oldstate); - void clientreplay_start(); - void clientreplay_done(); - void active_start(); - void stopping_start(); - void stopping_done(); - - void validate_sessions(); - // <<< - - // >>> - void handle_mds_recovery(mds_rank_t who); - void handle_mds_failure(mds_rank_t who); - // <<< - - /* Update MDSMap export_targets for this rank. Called on ::tick(). */ - void update_targets(utime_t now); -}; - -/* This expects to be given a reference which it is responsible for. - * The finish function calls functions which - * will put the Message exactly once.*/ -class C_MDS_RetryMessage : public MDSInternalContext { -protected: - Message *m; -public: - C_MDS_RetryMessage(MDSRank *mds, Message *m) - : MDSInternalContext(mds) - { - assert(m); - this->m = m; - } - void finish(int r) override { - mds->retry_dispatch(m); - } -}; - -/** - * The aspect of MDSRank exposed to MDSDaemon but not subsystems: i.e. - * the service/dispatcher stuff like init/shutdown that subsystems should - * never touch. - */ -class MDSRankDispatcher : public MDSRank -{ -public: - void init(); - void tick(); - void shutdown(); - bool handle_asok_command(std::string command, cmdmap_t& cmdmap, - Formatter *f, std::ostream& ss); - void handle_mds_map(MMDSMap *m, MDSMap *oldmap); - void handle_osd_map(); - void update_log_config(); - - bool handle_command( - const cmdmap_t &cmdmap, - MCommand *m, - int *r, - std::stringstream *ds, - std::stringstream *ss, - bool *need_reply); - - void dump_sessions(const SessionFilter &filter, Formatter *f) const; - void evict_clients(const SessionFilter &filter, MCommand *m); - - // Call into me from MDS::ms_dispatch - bool ms_dispatch(Message *m); - - MDSRankDispatcher( - mds_rank_t whoami_, - Mutex &mds_lock_, - LogChannelRef &clog_, - SafeTimer &timer_, - Beacon &beacon_, - MDSMap *& mdsmap_, - Messenger *msgr, - MonClient *monc_, - Context *respawn_hook_, - Context *suicide_hook_); -}; - -// This utility for MDS and MDSRank dispatchers. -#define ALLOW_MESSAGES_FROM(peers) \ -do { \ - if (m->get_connection() && (m->get_connection()->get_peer_type() & (peers)) == 0) { \ - dout(0) << __FILE__ << "." << __LINE__ << ": filtered out request, peer=" << m->get_connection()->get_peer_type() \ - << " allowing=" << #peers << " message=" << *m << dendl; \ - m->put(); \ - return true; \ - } \ -} while (0) - -#endif // MDS_RANK_H_ -