+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-/*
- * This is the top level monitor. It runs on each machine in the Monitor
- * Cluster. The election of a leader for the paxos algorithm only happens
- * once per machine via the elector. There is a separate paxos instance (state)
- * kept for each of the system components: Object Store Device (OSD) Monitor,
- * Placement Group (PG) Monitor, Metadata Server (MDS) Monitor, and Client Monitor.
- */
-
-#ifndef CEPH_MONITOR_H
-#define CEPH_MONITOR_H
-
-#include <errno.h>
-#include <cmath>
-
-#include "include/types.h"
-#include "include/health.h"
-#include "msg/Messenger.h"
-
-#include "common/Timer.h"
-
-#include "health_check.h"
-#include "MonMap.h"
-#include "Elector.h"
-#include "Paxos.h"
-#include "Session.h"
-#include "PGStatService.h"
-#include "MonCommand.h"
-
-#include "common/LogClient.h"
-#include "auth/cephx/CephxKeyServer.h"
-#include "auth/AuthMethodList.h"
-#include "auth/KeyRing.h"
-#include "messages/MMonCommand.h"
-#include "mon/MonitorDBStore.h"
-#include "include/memory.h"
-#include "mgr/MgrClient.h"
-
-#include "mon/MonOpRequest.h"
-#include "common/WorkQueue.h"
-
-
-#define CEPH_MON_PROTOCOL 13 /* cluster internal */
-
-
-enum {
- l_cluster_first = 555000,
- l_cluster_num_mon,
- l_cluster_num_mon_quorum,
- l_cluster_num_osd,
- l_cluster_num_osd_up,
- l_cluster_num_osd_in,
- l_cluster_osd_epoch,
- l_cluster_osd_bytes,
- l_cluster_osd_bytes_used,
- l_cluster_osd_bytes_avail,
- l_cluster_num_pool,
- l_cluster_num_pg,
- l_cluster_num_pg_active_clean,
- l_cluster_num_pg_active,
- l_cluster_num_pg_peering,
- l_cluster_num_object,
- l_cluster_num_object_degraded,
- l_cluster_num_object_misplaced,
- l_cluster_num_object_unfound,
- l_cluster_num_bytes,
- l_cluster_num_mds_up,
- l_cluster_num_mds_in,
- l_cluster_num_mds_failed,
- l_cluster_mds_epoch,
- l_cluster_last,
-};
-
-enum {
- l_mon_first = 456000,
- l_mon_num_sessions,
- l_mon_session_add,
- l_mon_session_rm,
- l_mon_session_trim,
- l_mon_num_elections,
- l_mon_election_call,
- l_mon_election_win,
- l_mon_election_lose,
- l_mon_last,
-};
-
-class QuorumService;
-class PaxosService;
-
-class PerfCounters;
-class AdminSocketHook;
-
-class MMonGetMap;
-class MMonGetVersion;
-class MMonMetadata;
-class MMonSync;
-class MMonScrub;
-class MMonProbe;
-struct MMonSubscribe;
-struct MRoute;
-struct MForward;
-struct MTimeCheck;
-struct MMonHealth;
-
-#define COMPAT_SET_LOC "feature_set"
-
-class C_MonContext final : public FunctionContext {
- const Monitor *mon;
-public:
- explicit C_MonContext(Monitor *m, boost::function<void(int)>&& callback)
- : FunctionContext(std::move(callback)), mon(m) {}
- void finish(int r) override;
-};
-
-class Monitor : public Dispatcher,
- public md_config_obs_t {
-public:
- // me
- string name;
- int rank;
- Messenger *messenger;
- ConnectionRef con_self;
- Mutex lock;
- SafeTimer timer;
- Finisher finisher;
- ThreadPool cpu_tp; ///< threadpool for CPU intensive work
-
- /// true if we have ever joined a quorum. if false, we are either a
- /// new cluster, a newly joining monitor, or a just-upgraded
- /// monitor.
- bool has_ever_joined;
-
- PerfCounters *logger, *cluster_logger;
- bool cluster_logger_registered;
-
- void register_cluster_logger();
- void unregister_cluster_logger();
-
- MonMap *monmap;
- uuid_d fingerprint;
-
- set<entity_addr_t> extra_probe_peers;
-
- LogClient log_client;
- LogChannelRef clog;
- LogChannelRef audit_clog;
- KeyRing keyring;
- KeyServer key_server;
-
- AuthMethodList auth_cluster_required;
- AuthMethodList auth_service_required;
-
- CompatSet features;
-
- vector<MonCommand> leader_mon_commands; // quorum leader's commands
- vector<MonCommand> local_mon_commands; // commands i support
- bufferlist local_mon_commands_bl; // encoded version of above
-
- // for upgrading mon cluster that still uses PGMonitor
- vector<MonCommand> local_upgrading_mon_commands; // mixed mon cluster commands
- bufferlist local_upgrading_mon_commands_bl; // encoded version of above
-
- Messenger *mgr_messenger;
- MgrClient mgr_client;
- uint64_t mgr_proxy_bytes = 0; // in-flight proxied mgr command message bytes
-
- const MonPGStatService *pgservice;
-
-private:
- void new_tick();
-
- // -- local storage --
-public:
- MonitorDBStore *store;
- static const string MONITOR_NAME;
- static const string MONITOR_STORE_PREFIX;
-
- // -- monitor state --
-private:
- enum {
- STATE_PROBING = 1,
- STATE_SYNCHRONIZING,
- STATE_ELECTING,
- STATE_LEADER,
- STATE_PEON,
- STATE_SHUTDOWN
- };
- int state;
-
-public:
- static const char *get_state_name(int s) {
- switch (s) {
- case STATE_PROBING: return "probing";
- case STATE_SYNCHRONIZING: return "synchronizing";
- case STATE_ELECTING: return "electing";
- case STATE_LEADER: return "leader";
- case STATE_PEON: return "peon";
- case STATE_SHUTDOWN: return "shutdown";
- default: return "???";
- }
- }
- const char *get_state_name() const {
- return get_state_name(state);
- }
-
- bool is_shutdown() const { return state == STATE_SHUTDOWN; }
- bool is_probing() const { return state == STATE_PROBING; }
- bool is_synchronizing() const { return state == STATE_SYNCHRONIZING; }
- bool is_electing() const { return state == STATE_ELECTING; }
- bool is_leader() const { return state == STATE_LEADER; }
- bool is_peon() const { return state == STATE_PEON; }
-
- const utime_t &get_leader_since() const;
-
- void prepare_new_fingerprint(MonitorDBStore::TransactionRef t);
-
- // -- elector --
-private:
- Paxos *paxos;
- Elector elector;
- friend class Elector;
-
- /// features we require of peers (based on on-disk compatset)
- uint64_t required_features;
-
- int leader; // current leader (to best of knowledge)
- set<int> quorum; // current active set of monitors (if !starting)
- utime_t leader_since; // when this monitor became the leader, if it is the leader
- utime_t exited_quorum; // time detected as not in quorum; 0 if in
-
- // map of counts of connected clients, by type and features, for
- // each quorum mon
- map<int,FeatureMap> quorum_feature_map;
-
- /**
- * Intersection of quorum member's connection feature bits.
- */
- uint64_t quorum_con_features;
- /**
- * Intersection of quorum members mon-specific feature bits
- */
- mon_feature_t quorum_mon_features;
-
- set<string> outside_quorum;
-
- /**
- * @defgroup Monitor_h_scrub
- * @{
- */
- version_t scrub_version; ///< paxos version we are scrubbing
- map<int,ScrubResult> scrub_result; ///< results so far
-
- /**
- * trigger a cross-mon scrub
- *
- * Verify all mons are storing identical content
- */
- int scrub_start();
- int scrub();
- void handle_scrub(MonOpRequestRef op);
- bool _scrub(ScrubResult *r,
- pair<string,string> *start,
- int *num_keys);
- void scrub_check_results();
- void scrub_timeout();
- void scrub_finish();
- void scrub_reset();
- void scrub_update_interval(int secs);
-
- Context *scrub_event; ///< periodic event to trigger scrub (leader)
- Context *scrub_timeout_event; ///< scrub round timeout (leader)
- void scrub_event_start();
- void scrub_event_cancel();
- void scrub_reset_timeout();
- void scrub_cancel_timeout();
-
- struct ScrubState {
- pair<string,string> last_key; ///< last scrubbed key
- bool finished;
-
- ScrubState() : finished(false) { }
- virtual ~ScrubState() { }
- };
- ceph::shared_ptr<ScrubState> scrub_state; ///< keeps track of current scrub
-
- /**
- * @defgroup Monitor_h_sync Synchronization
- * @{
- */
- /**
- * @} // provider state
- */
- struct SyncProvider {
- entity_inst_t entity; ///< who
- uint64_t cookie; ///< unique cookie for this sync attempt
- utime_t timeout; ///< when we give up and expire this attempt
- version_t last_committed; ///< last paxos version on peer
- pair<string,string> last_key; ///< last key sent to (or on) peer
- bool full; ///< full scan?
- MonitorDBStore::Synchronizer synchronizer; ///< iterator
-
- SyncProvider() : cookie(0), last_committed(0), full(false) {}
-
- void reset_timeout(CephContext *cct, int grace) {
- timeout = ceph_clock_now();
- timeout += grace;
- }
- };
-
- map<uint64_t, SyncProvider> sync_providers; ///< cookie -> SyncProvider for those syncing from us
- uint64_t sync_provider_count; ///< counter for issued cookies to keep them unique
-
- /**
- * @} // requester state
- */
- entity_inst_t sync_provider; ///< who we are syncing from
- uint64_t sync_cookie; ///< 0 if we are starting, non-zero otherwise
- bool sync_full; ///< true if we are a full sync, false for recent catch-up
- version_t sync_start_version; ///< last_committed at sync start
- Context *sync_timeout_event; ///< timeout event
-
- /**
- * floor for sync source
- *
- * When we sync we forget about our old last_committed value which
- * can be dangerous. For example, if we have a cluster of:
- *
- * mon.a: lc 100
- * mon.b: lc 80
- * mon.c: lc 100 (us)
- *
- * If something forces us to sync (say, corruption, or manual
- * intervention, or bug), we forget last_committed, and might abort.
- * If mon.a happens to be down when we come back, we will see:
- *
- * mon.b: lc 80
- * mon.c: lc 0 (us)
- *
- * and sync from mon.b, at which point a+b will both have lc 80 and
- * come online with a majority holding out of date commits.
- *
- * Avoid this by preserving our old last_committed value prior to
- * sync and never going backwards.
- */
- version_t sync_last_committed_floor;
-
- /**
- * Obtain the synchronization target prefixes in set form.
- *
- * We consider a target prefix all those that are relevant when
- * synchronizing two stores. That is, all those that hold paxos service's
- * versions, as well as paxos versions, or any control keys such as the
- * first or last committed version.
- *
- * Given the current design, this function should return the name of all and
- * any available paxos service, plus the paxos name.
- *
- * @returns a set of strings referring to the prefixes being synchronized
- */
- set<string> get_sync_targets_names();
-
- /**
- * Reset the monitor's sync-related data structures for syncing *from* a peer
- */
- void sync_reset_requester();
-
- /**
- * Reset sync state related to allowing others to sync from us
- */
- void sync_reset_provider();
-
- /**
- * Caled when a sync attempt times out (requester-side)
- */
- void sync_timeout();
-
- /**
- * Get the latest monmap for backup purposes during sync
- */
- void sync_obtain_latest_monmap(bufferlist &bl);
-
- /**
- * Start sync process
- *
- * Start pulling committed state from another monitor.
- *
- * @param entity where to pull committed state from
- * @param full whether to do a full sync or just catch up on recent paxos
- */
- void sync_start(entity_inst_t &entity, bool full);
-
-public:
- /**
- * force a sync on next mon restart
- */
- void sync_force(Formatter *f, ostream& ss);
-
-private:
- /**
- * store critical state for safekeeping during sync
- *
- * We store a few things on the side that we don't want to get clobbered by sync. This
- * includes the latest monmap and a lower bound on last_committed.
- */
- void sync_stash_critical_state(MonitorDBStore::TransactionRef tx);
-
- /**
- * reset the sync timeout
- *
- * This is used on the client to restart if things aren't progressing
- */
- void sync_reset_timeout();
-
- /**
- * trim stale sync provider state
- *
- * If someone is syncing from us and hasn't talked to us recently, expire their state.
- */
- void sync_trim_providers();
-
- /**
- * Complete a sync
- *
- * Finish up a sync after we've gotten all of the chunks.
- *
- * @param last_committed final last_committed value from provider
- */
- void sync_finish(version_t last_committed);
-
- /**
- * request the next chunk from the provider
- */
- void sync_get_next_chunk();
-
- /**
- * handle sync message
- *
- * @param m Sync message with operation type MMonSync::OP_START_CHUNKS
- */
- void handle_sync(MonOpRequestRef op);
-
- void _sync_reply_no_cookie(MonOpRequestRef op);
-
- void handle_sync_get_cookie(MonOpRequestRef op);
- void handle_sync_get_chunk(MonOpRequestRef op);
- void handle_sync_finish(MonOpRequestRef op);
-
- void handle_sync_cookie(MonOpRequestRef op);
- void handle_sync_forward(MonOpRequestRef op);
- void handle_sync_chunk(MonOpRequestRef op);
- void handle_sync_no_cookie(MonOpRequestRef op);
-
- /**
- * @} // Synchronization
- */
-
- list<Context*> waitfor_quorum;
- list<Context*> maybe_wait_for_quorum;
-
- /**
- * @defgroup Monitor_h_TimeCheck Monitor Clock Drift Early Warning System
- * @{
- *
- * We use time checks to keep track of any clock drifting going on in the
- * cluster. This is accomplished by periodically ping each monitor in the
- * quorum and register its response time on a map, assessing how much its
- * clock has drifted. We also take this opportunity to assess the latency
- * on response.
- *
- * This mechanism works as follows:
- *
- * - Leader sends out a 'PING' message to each other monitor in the quorum.
- * The message is timestamped with the leader's current time. The leader's
- * current time is recorded in a map, associated with each peon's
- * instance.
- * - The peon replies to the leader with a timestamped 'PONG' message.
- * - The leader calculates a delta between the peon's timestamp and its
- * current time and stashes it.
- * - The leader also calculates the time it took to receive the 'PONG'
- * since the 'PING' was sent, and stashes an approximate latency estimate.
- * - Once all the quorum members have pong'ed, the leader will share the
- * clock skew and latency maps with all the monitors in the quorum.
- */
- map<entity_inst_t, utime_t> timecheck_waiting;
- map<entity_inst_t, double> timecheck_skews;
- map<entity_inst_t, double> timecheck_latencies;
- // odd value means we are mid-round; even value means the round has
- // finished.
- version_t timecheck_round;
- unsigned int timecheck_acks;
- utime_t timecheck_round_start;
- friend class HealthMonitor;
- /* When we hit a skew we will start a new round based off of
- * 'mon_timecheck_skew_interval'. Each new round will be backed off
- * until we hit 'mon_timecheck_interval' -- which is the typical
- * interval when not in the presence of a skew.
- *
- * This variable tracks the number of rounds with skews since last clean
- * so that we can report to the user and properly adjust the backoff.
- */
- uint64_t timecheck_rounds_since_clean;
- /**
- * Time Check event.
- */
- Context *timecheck_event;
-
- void timecheck_start();
- void timecheck_finish();
- void timecheck_start_round();
- void timecheck_finish_round(bool success = true);
- void timecheck_cancel_round();
- void timecheck_cleanup();
- void timecheck_reset_event();
- void timecheck_check_skews();
- void timecheck_report();
- void timecheck();
- health_status_t timecheck_status(ostringstream &ss,
- const double skew_bound,
- const double latency);
- void handle_timecheck_leader(MonOpRequestRef op);
- void handle_timecheck_peon(MonOpRequestRef op);
- void handle_timecheck(MonOpRequestRef op);
-
- /**
- * Returns 'true' if this is considered to be a skew; 'false' otherwise.
- */
- bool timecheck_has_skew(const double skew_bound, double *abs) const {
- double abs_skew = std::fabs(skew_bound);
- if (abs)
- *abs = abs_skew;
- return (abs_skew > g_conf->mon_clock_drift_allowed);
- }
-
- /**
- * @}
- */
- /**
- * Handle ping messages from others.
- */
- void handle_ping(MonOpRequestRef op);
-
- Context *probe_timeout_event = nullptr; // for probing
-
- void reset_probe_timeout();
- void cancel_probe_timeout();
- void probe_timeout(int r);
-
- void _apply_compatset_features(CompatSet &new_features);
-
-public:
- epoch_t get_epoch();
- int get_leader() const { return leader; }
- string get_leader_name() {
- return quorum.empty() ? string() : monmap->get_name(*quorum.begin());
- }
- const set<int>& get_quorum() const { return quorum; }
- list<string> get_quorum_names() {
- list<string> q;
- for (set<int>::iterator p = quorum.begin(); p != quorum.end(); ++p)
- q.push_back(monmap->get_name(*p));
- return q;
- }
- uint64_t get_quorum_con_features() const {
- return quorum_con_features;
- }
- mon_feature_t get_quorum_mon_features() const {
- return quorum_mon_features;
- }
- uint64_t get_required_features() const {
- return required_features;
- }
- mon_feature_t get_required_mon_features() const {
- return monmap->get_required_features();
- }
- void apply_quorum_to_compatset_features();
- void apply_monmap_to_compatset_features();
- void calc_quorum_requirements();
-
- void get_combined_feature_map(FeatureMap *fm);
-
-private:
- void _reset(); ///< called from bootstrap, start_, or join_election
- void wait_for_paxos_write();
- void _finish_svc_election(); ///< called by {win,lose}_election
-public:
- void bootstrap();
- void join_election();
- void start_election();
- void win_standalone_election();
- // end election (called by Elector)
- void win_election(epoch_t epoch, set<int>& q,
- uint64_t features,
- const mon_feature_t& mon_features,
- const map<int,Metadata>& metadata);
- void lose_election(epoch_t epoch, set<int>& q, int l,
- uint64_t features,
- const mon_feature_t& mon_features);
- // end election (called by Elector)
- void finish_election();
-
- void update_logger();
-
- /**
- * Vector holding the Services serviced by this Monitor.
- */
- vector<PaxosService*> paxos_service;
-
- class PGMonitor *pgmon() {
- return (class PGMonitor *)paxos_service[PAXOS_PGMAP];
- }
-
- class MDSMonitor *mdsmon() {
- return (class MDSMonitor *)paxos_service[PAXOS_MDSMAP];
- }
-
- class MonmapMonitor *monmon() {
- return (class MonmapMonitor *)paxos_service[PAXOS_MONMAP];
- }
-
- class OSDMonitor *osdmon() {
- return (class OSDMonitor *)paxos_service[PAXOS_OSDMAP];
- }
-
- class AuthMonitor *authmon() {
- return (class AuthMonitor *)paxos_service[PAXOS_AUTH];
- }
-
- class LogMonitor *logmon() {
- return (class LogMonitor*) paxos_service[PAXOS_LOG];
- }
-
- class MgrMonitor *mgrmon() {
- return (class MgrMonitor*) paxos_service[PAXOS_MGR];
- }
-
- class MgrStatMonitor *mgrstatmon() {
- return (class MgrStatMonitor*) paxos_service[PAXOS_MGRSTAT];
- }
-
- class MgrStatMonitor *healthmon() {
- return (class MgrStatMonitor*) paxos_service[PAXOS_MGRSTAT];
- }
-
- friend class Paxos;
- friend class OSDMonitor;
- friend class MDSMonitor;
- friend class MonmapMonitor;
- friend class PGMonitor;
- friend class LogMonitor;
- friend class ConfigKeyService;
-
- QuorumService *health_monitor;
- QuorumService *config_key_service;
-
- // -- sessions --
- MonSessionMap session_map;
- Mutex session_map_lock{"Monitor::session_map_lock"};
- AdminSocketHook *admin_hook;
-
- template<typename Func, typename...Args>
- void with_session_map(Func&& func) {
- Mutex::Locker l(session_map_lock);
- std::forward<Func>(func)(session_map);
- }
- void send_latest_monmap(Connection *con);
-
- // messages
- void handle_get_version(MonOpRequestRef op);
- void handle_subscribe(MonOpRequestRef op);
- void handle_mon_get_map(MonOpRequestRef op);
-
- static void _generate_command_map(map<string,cmd_vartype>& cmdmap,
- map<string,string> ¶m_str_map);
- static const MonCommand *_get_moncommand(
- const string &cmd_prefix,
- const vector<MonCommand>& cmds);
- bool _allowed_command(MonSession *s, string &module, string &prefix,
- const map<string,cmd_vartype>& cmdmap,
- const map<string,string>& param_str_map,
- const MonCommand *this_cmd);
- void get_mon_status(Formatter *f, ostream& ss);
- void _quorum_status(Formatter *f, ostream& ss);
- bool _add_bootstrap_peer_hint(string cmd, cmdmap_t& cmdmap, ostream& ss);
- void handle_command(MonOpRequestRef op);
- void handle_route(MonOpRequestRef op);
-
- void handle_mon_metadata(MonOpRequestRef op);
- int get_mon_metadata(int mon, Formatter *f, ostream& err);
- int print_nodes(Formatter *f, ostream& err);
-
- // Accumulate metadata across calls to update_mon_metadata
- map<int, Metadata> mon_metadata;
- map<int, Metadata> pending_metadata;
-
- /**
- *
- */
- struct health_cache_t {
- health_status_t overall;
- string summary;
-
- void reset() {
- // health_status_t doesn't really have a NONE value and we're not
- // okay with setting something else (say, HEALTH_ERR). so just
- // leave it be.
- summary.clear();
- }
- } health_status_cache;
-
- Context *health_tick_event = nullptr;
- Context *health_interval_event = nullptr;
-
- void health_tick_start();
- void health_tick_stop();
- utime_t health_interval_calc_next_update();
- void health_interval_start();
- void health_interval_stop();
- void health_events_cleanup();
-
- void health_to_clog_update_conf(const std::set<std::string> &changed);
-
- void do_health_to_clog_interval();
- void do_health_to_clog(bool force = false);
-
- /**
- * Generate health report
- *
- * @param status one-line status summary
- * @param detailbl optional bufferlist* to fill with a detailed report
- * @returns health status
- */
- health_status_t get_health(list<string>& status, bufferlist *detailbl,
- Formatter *f);
-
- health_status_t get_health_status(
- bool want_detail,
- Formatter *f,
- std::string *plain,
- const char *sep1 = " ",
- const char *sep2 = "; ");
- void log_health(
- const health_check_map_t& updated,
- const health_check_map_t& previous,
- MonitorDBStore::TransactionRef t);
-
-protected:
-
- class HealthCheckLogStatus {
- public:
- health_status_t severity;
- std::string last_message;
- utime_t updated_at = 0;
- HealthCheckLogStatus(health_status_t severity_,
- const std::string &last_message_,
- utime_t updated_at_)
- : severity(severity_),
- last_message(last_message_),
- updated_at(updated_at_)
- {}
- };
- std::map<std::string, HealthCheckLogStatus> health_check_log_times;
-
-public:
-
- void get_cluster_status(stringstream &ss, Formatter *f);
-
- void reply_command(MonOpRequestRef op, int rc, const string &rs, version_t version);
- void reply_command(MonOpRequestRef op, int rc, const string &rs, bufferlist& rdata, version_t version);
-
-
- void handle_probe(MonOpRequestRef op);
- /**
- * Handle a Probe Operation, replying with our name, quorum and known versions.
- *
- * We use the MMonProbe message class for anything and everything related with
- * Monitor probing. One of the operations relates directly with the probing
- * itself, in which we receive a probe request and to which we reply with
- * our name, our quorum and the known versions for each Paxos service. Thus the
- * redundant function name. This reply will obviously be sent to the one
- * probing/requesting these infos.
- *
- * @todo Add @pre and @post
- *
- * @param m A Probe message, with an operation of type Probe.
- */
- void handle_probe_probe(MonOpRequestRef op);
- void handle_probe_reply(MonOpRequestRef op);
-
- // request routing
- struct RoutedRequest {
- uint64_t tid;
- bufferlist request_bl;
- MonSession *session;
- ConnectionRef con;
- uint64_t con_features;
- entity_inst_t client_inst;
- MonOpRequestRef op;
-
- RoutedRequest() : tid(0), session(NULL), con_features(0) {}
- ~RoutedRequest() {
- if (session)
- session->put();
- }
- };
- uint64_t routed_request_tid;
- map<uint64_t, RoutedRequest*> routed_requests;
-
- void forward_request_leader(MonOpRequestRef op);
- void handle_forward(MonOpRequestRef op);
- void try_send_message(Message *m, const entity_inst_t& to);
- void send_reply(MonOpRequestRef op, Message *reply);
- void no_reply(MonOpRequestRef op);
- void resend_routed_requests();
- void remove_session(MonSession *s);
- void remove_all_sessions();
- void waitlist_or_zap_client(MonOpRequestRef op);
-
- void send_command(const entity_inst_t& inst,
- const vector<string>& com);
-
-public:
- struct C_Command : public C_MonOp {
- Monitor *mon;
- int rc;
- string rs;
- bufferlist rdata;
- version_t version;
- C_Command(Monitor *_mm, MonOpRequestRef _op, int r, string s, version_t v) :
- C_MonOp(_op), mon(_mm), rc(r), rs(s), version(v){}
- C_Command(Monitor *_mm, MonOpRequestRef _op, int r, string s, bufferlist rd, version_t v) :
- C_MonOp(_op), mon(_mm), rc(r), rs(s), rdata(rd), version(v){}
-
- void _finish(int r) override {
- MMonCommand *m = static_cast<MMonCommand*>(op->get_req());
- if (r >= 0) {
- ostringstream ss;
- if (!op->get_req()->get_connection()) {
- ss << "connection dropped for command ";
- } else {
- MonSession *s = op->get_session();
-
- // if client drops we may not have a session to draw information from.
- if (s) {
- ss << "from='" << s->inst << "' "
- << "entity='" << s->entity_name << "' ";
- } else {
- ss << "session dropped for command ";
- }
- }
- ss << "cmd='" << m->cmd << "': finished";
-
- mon->audit_clog->info() << ss.str();
- mon->reply_command(op, rc, rs, rdata, version);
- }
- else if (r == -ECANCELED)
- return;
- else if (r == -EAGAIN)
- mon->dispatch_op(op);
- else
- assert(0 == "bad C_Command return value");
- }
- };
-
- private:
- class C_RetryMessage : public C_MonOp {
- Monitor *mon;
- public:
- C_RetryMessage(Monitor *m, MonOpRequestRef op) :
- C_MonOp(op), mon(m) { }
-
- void _finish(int r) override {
- if (r == -EAGAIN || r >= 0)
- mon->dispatch_op(op);
- else if (r == -ECANCELED)
- return;
- else
- assert(0 == "bad C_RetryMessage return value");
- }
- };
-
- //ms_dispatch handles a lot of logic and we want to reuse it
- //on forwarded messages, so we create a non-locking version for this class
- void _ms_dispatch(Message *m);
- bool ms_dispatch(Message *m) override {
- lock.Lock();
- _ms_dispatch(m);
- lock.Unlock();
- return true;
- }
- void dispatch_op(MonOpRequestRef op);
- //mon_caps is used for un-connected messages from monitors
- MonCap * mon_caps;
- bool ms_get_authorizer(int dest_type, AuthAuthorizer **authorizer, bool force_new) override;
- bool ms_verify_authorizer(Connection *con, int peer_type,
- int protocol, bufferlist& authorizer_data, bufferlist& authorizer_reply,
- bool& isvalid, CryptoKey& session_key) override;
- bool ms_handle_reset(Connection *con) override;
- void ms_handle_remote_reset(Connection *con) override {}
- bool ms_handle_refused(Connection *con) override;
-
- int write_default_keyring(bufferlist& bl);
- void extract_save_mon_key(KeyRing& keyring);
-
- void collect_metadata(Metadata *m);
- void update_mon_metadata(int from, Metadata&& m);
- int load_metadata();
- void count_metadata(const string& field, Formatter *f);
- void count_metadata(const string& field, map<string,int> *out);
-
- // features
- static CompatSet get_initial_supported_features();
- static CompatSet get_supported_features();
- static CompatSet get_legacy_features();
- /// read the ondisk features into the CompatSet pointed to by read_features
- static void read_features_off_disk(MonitorDBStore *store, CompatSet *read_features);
- void read_features();
- void write_features(MonitorDBStore::TransactionRef t);
-
- OpTracker op_tracker;
-
- public:
- Monitor(CephContext *cct_, string nm, MonitorDBStore *s,
- Messenger *m, Messenger *mgr_m, MonMap *map);
- ~Monitor() override;
-
- static int check_features(MonitorDBStore *store);
-
- // config observer
- const char** get_tracked_conf_keys() const override;
- void handle_conf_change(const struct md_config_t *conf,
- const std::set<std::string> &changed) override;
-
- void update_log_clients();
- int sanitize_options();
- int preinit();
- int init();
- void init_paxos();
- void refresh_from_paxos(bool *need_bootstrap);
- void shutdown();
- void tick();
-
- void handle_signal(int sig);
-
- int mkfs(bufferlist& osdmapbl);
-
- /**
- * check cluster_fsid file
- *
- * @return EEXIST if file exists and doesn't match, 0 on match, or negative error code
- */
- int check_fsid();
-
- /**
- * write cluster_fsid file
- *
- * @return 0 on success, or negative error code
- */
- int write_fsid();
- int write_fsid(MonitorDBStore::TransactionRef t);
-
- void do_admin_command(std::string command, cmdmap_t& cmdmap,
- std::string format, ostream& ss);
-
-private:
- // don't allow copying
- Monitor(const Monitor& rhs);
- Monitor& operator=(const Monitor &rhs);
-
-public:
- static void format_command_descriptions(const std::vector<MonCommand> &commands,
- Formatter *f,
- bufferlist *rdata,
- bool hide_mgr_flag=false);
-
- const std::vector<MonCommand> &get_local_commands(mon_feature_t f) {
- if (f.contains_all(ceph::features::mon::FEATURE_LUMINOUS))
- return local_mon_commands;
- else
- return local_upgrading_mon_commands;
- }
- const bufferlist& get_local_commands_bl(mon_feature_t f) {
- if (f.contains_all(ceph::features::mon::FEATURE_LUMINOUS))
- return local_mon_commands_bl;
- else
- return local_upgrading_mon_commands_bl;
- }
- void set_leader_commands(const std::vector<MonCommand>& cmds) {
- leader_mon_commands = cmds;
- }
-
- static bool is_keyring_required();
-};
-
-#define CEPH_MON_FEATURE_INCOMPAT_BASE CompatSet::Feature (1, "initial feature set (~v.18)")
-#define CEPH_MON_FEATURE_INCOMPAT_GV CompatSet::Feature (2, "global version sequencing (v0.52)")
-#define CEPH_MON_FEATURE_INCOMPAT_SINGLE_PAXOS CompatSet::Feature (3, "single paxos with k/v store (v0.\?)")
-#define CEPH_MON_FEATURE_INCOMPAT_OSD_ERASURE_CODES CompatSet::Feature(4, "support erasure code pools")
-#define CEPH_MON_FEATURE_INCOMPAT_OSDMAP_ENC CompatSet::Feature(5, "new-style osdmap encoding")
-#define CEPH_MON_FEATURE_INCOMPAT_ERASURE_CODE_PLUGINS_V2 CompatSet::Feature(6, "support isa/lrc erasure code")
-#define CEPH_MON_FEATURE_INCOMPAT_ERASURE_CODE_PLUGINS_V3 CompatSet::Feature(7, "support shec erasure code")
-#define CEPH_MON_FEATURE_INCOMPAT_KRAKEN CompatSet::Feature(8, "support monmap features")
-#define CEPH_MON_FEATURE_INCOMPAT_LUMINOUS CompatSet::Feature(9, "luminous ondisk layout")
-// make sure you add your feature to Monitor::get_supported_features
-
-
-
-#endif