1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2017 Red Hat Ltd
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
16 #include "OSDMonitor.h"
17 #include "PGMonitor.h"
19 #include "FSCommands.h"
20 #include "MDSMonitor.h"
24 static const string EXPERIMENTAL_WARNING("Warning! This feature is experimental."
25 "It may cause problems up to and including data loss."
26 "Consult the documentation at ceph.com, and if unsure, do not proceed."
27 "Add --yes-i-really-mean-it if you are certain.");
31 class FlagSetHandler : public FileSystemCommandHandler
35 : FileSystemCommandHandler("fs flag set")
43 map<string, cmd_vartype> &cmdmap,
44 std::stringstream &ss) override
47 cmd_getval(g_ceph_context, cmdmap, "flag_name", flag_name);
50 cmd_getval(g_ceph_context, cmdmap, "val", flag_val);
53 cmd_getval(g_ceph_context, cmdmap, "confirm", confirm);
55 if (flag_name == "enable_multiple") {
56 bool flag_bool = false;
57 int r = parse_bool(flag_val, &flag_bool, ss);
59 ss << "Invalid boolean value '" << flag_val << "'";
63 bool jewel = mon->get_quorum_con_features() & CEPH_FEATURE_SERVER_JEWEL;
64 if (flag_bool && !jewel) {
65 ss << "Multiple-filesystems are forbidden until all mons are updated";
68 if (confirm != "--yes-i-really-mean-it") {
69 ss << EXPERIMENTAL_WARNING;
71 fsmap.set_enable_multiple(flag_bool);
74 ss << "Unknown flag '" << flag_name << "'";
80 class FsNewHandler : public FileSystemCommandHandler
83 FsNewHandler(Paxos *paxos)
84 : FileSystemCommandHandler("fs new"), m_paxos(paxos)
88 bool batched_propose() override {
96 map<string, cmd_vartype> &cmdmap,
97 std::stringstream &ss) override
99 assert(m_paxos->is_plugged());
101 string metadata_name;
102 cmd_getval(g_ceph_context, cmdmap, "metadata", metadata_name);
103 int64_t metadata = mon->osdmon()->osdmap.lookup_pg_pool_name(metadata_name);
105 ss << "pool '" << metadata_name << "' does not exist";
110 cmd_getval(g_ceph_context,cmdmap, "force", force_str);
111 bool force = (force_str == "--force");
112 const pool_stat_t *stat = mon->pgservice->get_pool_stat(metadata);
114 int64_t metadata_num_objects = stat->stats.sum.num_objects;
115 if (!force && metadata_num_objects > 0) {
116 ss << "pool '" << metadata_name
117 << "' already contains some objects. Use an empty pool instead.";
123 cmd_getval(g_ceph_context, cmdmap, "data", data_name);
124 int64_t data = mon->osdmon()->osdmap.lookup_pg_pool_name(data_name);
126 ss << "pool '" << data_name << "' does not exist";
130 ss << "pool '" << data_name << "' has id 0, which CephFS does not allow. Use another pool or recreate it to get a non-zero pool id.";
135 cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name);
136 if (fs_name.empty()) {
137 // Ensure fs name is not empty so that we can implement
138 // commmands that refer to FS by name in future.
139 ss << "Filesystem name may not be empty";
143 if (fsmap.get_filesystem(fs_name)) {
144 auto fs = fsmap.get_filesystem(fs_name);
145 if (*(fs->mds_map.get_data_pools().begin()) == data
146 && fs->mds_map.get_metadata_pool() == metadata) {
147 // Identical FS created already, this is a no-op
148 ss << "filesystem '" << fs_name << "' already exists";
151 ss << "filesystem already exists with name '" << fs_name << "'";
156 if (fsmap.filesystem_count() > 0
157 && !fsmap.get_enable_multiple()) {
158 ss << "Creation of multiple filesystems is disabled. To enable "
159 "this experimental feature, use 'ceph fs flag set enable_multiple "
164 for (auto fs : fsmap.get_filesystems()) {
165 const std::vector<int64_t> &data_pools = fs->mds_map.get_data_pools();
167 if ((std::find(data_pools.begin(), data_pools.end(), data) != data_pools.end()
168 || fs->mds_map.get_metadata_pool() == metadata)
169 && ((!cmd_getval(g_ceph_context, cmdmap, "sure", sure)
170 || sure != "--allow-dangerous-metadata-overlay"))) {
171 ss << "Filesystem '" << fs_name
172 << "' is already using one of the specified RADOS pools. This should ONLY be done in emergencies and after careful reading of the documentation. Pass --allow-dangerous-metadata-overlay to permit this.";
177 pg_pool_t const *data_pool = mon->osdmon()->osdmap.get_pg_pool(data);
178 assert(data_pool != NULL); // Checked it existed above
179 pg_pool_t const *metadata_pool = mon->osdmon()->osdmap.get_pg_pool(metadata);
180 assert(metadata_pool != NULL); // Checked it existed above
182 int r = _check_pool(mon->osdmon()->osdmap, data, false, force, &ss);
187 r = _check_pool(mon->osdmon()->osdmap, metadata, true, force, &ss);
192 // if we're running as luminous, we have to set the pool application metadata
193 if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS ||
194 mon->osdmon()->pending_inc.new_require_osd_release >= CEPH_RELEASE_LUMINOUS) {
195 if (!mon->osdmon()->is_writeable()) {
196 // not allowed to write yet, so retry when we can
197 mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
200 mon->osdmon()->do_application_enable(data,
201 pg_pool_t::APPLICATION_NAME_CEPHFS);
202 mon->osdmon()->do_application_enable(metadata,
203 pg_pool_t::APPLICATION_NAME_CEPHFS);
204 mon->osdmon()->propose_pending();
207 // All checks passed, go ahead and create.
208 fsmap.create_filesystem(fs_name, metadata, data,
209 mon->get_quorum_con_features());
210 ss << "new fs with metadata pool " << metadata << " and data pool " << data;
218 class SetHandler : public FileSystemCommandHandler
222 : FileSystemCommandHandler("fs set")
229 map<string, cmd_vartype> &cmdmap,
230 std::stringstream &ss) override
233 if (!cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name) || fs_name.empty()) {
234 ss << "Missing filesystem name";
238 auto fs = fsmap.get_filesystem(fs_name);
240 ss << "Not found: '" << fs_name << "'";
245 if (!cmd_getval(g_ceph_context, cmdmap, "var", var) || var.empty()) {
246 ss << "Invalid variable";
252 if (!cmd_getval(g_ceph_context, cmdmap, "val", val)) {
255 // we got a string. see if it contains an int.
256 n = strict_strtoll(val.c_str(), 10, &interr);
257 if (var == "max_mds") {
258 // NOTE: see also "mds set_max_mds", which can modify the same field.
259 if (interr.length()) {
265 ss << "You must specify at least one MDS";
269 if (!fs->mds_map.allows_multimds() && n > fs->mds_map.get_max_mds() &&
271 ss << "multi-MDS clusters are not enabled; set 'allow_multimds' to enable";
275 ss << "may not have more than " << MAX_MDS << " MDS ranks";
278 fsmap.modify_filesystem(
280 [n](std::shared_ptr<Filesystem> fs)
282 fs->mds_map.set_max_mds(n);
284 } else if (var == "inline_data") {
285 bool enable_inline = false;
286 int r = parse_bool(val, &enable_inline, ss);
293 if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
294 confirm != "--yes-i-really-mean-it") {
295 ss << EXPERIMENTAL_WARNING;
298 ss << "inline data enabled";
300 fsmap.modify_filesystem(
302 [](std::shared_ptr<Filesystem> fs)
304 fs->mds_map.set_inline_data_enabled(true);
308 CompatSet c = fsmap.get_compat();
309 c.incompat.insert(MDS_FEATURE_INCOMPAT_INLINE);
310 fsmap.update_compat(c);
312 ss << "inline data disabled";
313 fsmap.modify_filesystem(
315 [](std::shared_ptr<Filesystem> fs)
317 fs->mds_map.set_inline_data_enabled(false);
320 } else if (var == "balancer") {
322 ss << "unsetting the metadata load balancer";
324 ss << "setting the metadata load balancer to " << val;
326 fsmap.modify_filesystem(
328 [val](std::shared_ptr<Filesystem> fs)
330 fs->mds_map.set_balancer(val);
333 } else if (var == "max_file_size") {
334 if (interr.length()) {
335 ss << var << " requires an integer value";
338 if (n < CEPH_MIN_STRIPE_UNIT) {
339 ss << var << " must at least " << CEPH_MIN_STRIPE_UNIT;
342 fsmap.modify_filesystem(
344 [n](std::shared_ptr<Filesystem> fs)
346 fs->mds_map.set_max_filesize(n);
348 } else if (var == "allow_new_snaps") {
349 bool enable_snaps = false;
350 int r = parse_bool(val, &enable_snaps, ss);
356 fsmap.modify_filesystem(
358 [](std::shared_ptr<Filesystem> fs)
360 fs->mds_map.clear_snaps_allowed();
362 ss << "disabled new snapshots";
365 if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
366 confirm != "--yes-i-really-mean-it") {
367 ss << EXPERIMENTAL_WARNING;
370 fsmap.modify_filesystem(
372 [](std::shared_ptr<Filesystem> fs)
374 fs->mds_map.set_snaps_allowed();
376 ss << "enabled new snapshots";
378 } else if (var == "allow_multimds") {
379 bool enable_multimds = false;
380 int r = parse_bool(val, &enable_multimds, ss);
385 if (!enable_multimds) {
386 fsmap.modify_filesystem(fs->fscid,
387 [](std::shared_ptr<Filesystem> fs)
389 fs->mds_map.clear_multimds_allowed();
391 ss << "disallowed increasing the cluster size past 1";
393 fsmap.modify_filesystem(
395 [](std::shared_ptr<Filesystem> fs)
397 fs->mds_map.set_multimds_allowed();
399 ss << "enabled creation of more than 1 active MDS";
401 } else if (var == "allow_dirfrags") {
402 bool enable_dirfrags = false;
403 int r = parse_bool(val, &enable_dirfrags, ss);
408 if (!enable_dirfrags) {
409 fsmap.modify_filesystem(fs->fscid,
410 [](std::shared_ptr<Filesystem> fs)
412 fs->mds_map.clear_dirfrags_allowed();
414 ss << "disallowed new directory fragmentation";
416 fsmap.modify_filesystem(
418 [](std::shared_ptr<Filesystem> fs)
420 fs->mds_map.set_dirfrags_allowed();
422 ss << "enabled directory fragmentation";
424 } else if (var == "cluster_down") {
425 bool is_down = false;
426 int r = parse_bool(val, &is_down, ss);
431 fsmap.modify_filesystem(
433 [is_down](std::shared_ptr<Filesystem> fs)
436 fs->mds_map.set_flag(CEPH_MDSMAP_DOWN);
438 fs->mds_map.clear_flag(CEPH_MDSMAP_DOWN);
442 ss << "marked " << (is_down ? "down" : "up");
443 } else if (var == "standby_count_wanted") {
444 if (interr.length()) {
445 ss << var << " requires an integer value";
449 ss << var << " must be non-negative";
452 fsmap.modify_filesystem(
454 [n](std::shared_ptr<Filesystem> fs)
456 fs->mds_map.set_standby_count_wanted(n);
459 ss << "unknown variable " << var;
467 class AddDataPoolHandler : public FileSystemCommandHandler
470 AddDataPoolHandler(Paxos *paxos)
471 : FileSystemCommandHandler("fs add_data_pool"), m_paxos(paxos)
474 bool batched_propose() override {
482 map<string, cmd_vartype> &cmdmap,
483 std::stringstream &ss) override
485 assert(m_paxos->is_plugged());
488 cmd_getval(g_ceph_context, cmdmap, "pool", poolname);
491 if (!cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name)
492 || fs_name.empty()) {
493 ss << "Missing filesystem name";
497 auto fs = fsmap.get_filesystem(fs_name);
499 ss << "Not found: '" << fs_name << "'";
503 int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname);
506 poolid = strict_strtol(poolname.c_str(), 10, &err);
508 ss << "pool '" << poolname << "' does not exist";
513 int r = _check_pool(mon->osdmon()->osdmap, poolid, false, false, &ss);
518 // no-op when the data_pool already on fs
519 if (fs->mds_map.is_data_pool(poolid)) {
520 ss << "data pool " << poolid << " is already on fs " << fs_name;
524 // if we're running as luminous, we have to set the pool application metadata
525 if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS ||
526 mon->osdmon()->pending_inc.new_require_osd_release >= CEPH_RELEASE_LUMINOUS) {
527 if (!mon->osdmon()->is_writeable()) {
528 // not allowed to write yet, so retry when we can
529 mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
532 mon->osdmon()->do_application_enable(poolid, pg_pool_t::APPLICATION_NAME_CEPHFS);
533 mon->osdmon()->propose_pending();
536 fsmap.modify_filesystem(
538 [poolid](std::shared_ptr<Filesystem> fs)
540 fs->mds_map.add_data_pool(poolid);
543 ss << "added data pool " << poolid << " to fsmap";
552 class SetDefaultHandler : public FileSystemCommandHandler
556 : FileSystemCommandHandler("fs set-default")
563 map<string, cmd_vartype> &cmdmap,
564 std::stringstream &ss) override
567 cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name);
568 auto fs = fsmap.get_filesystem(fs_name);
570 ss << "filesystem '" << fs_name << "' does not exist";
574 fsmap.set_legacy_client_fscid(fs->fscid);
579 class RemoveFilesystemHandler : public FileSystemCommandHandler
582 RemoveFilesystemHandler()
583 : FileSystemCommandHandler("fs rm")
590 map<string, cmd_vartype> &cmdmap,
591 std::stringstream &ss) override
593 // Check caller has correctly named the FS to delete
594 // (redundant while there is only one FS, but command
595 // syntax should apply to multi-FS future)
597 cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name);
598 auto fs = fsmap.get_filesystem(fs_name);
600 // Consider absence success to make deletes idempotent
601 ss << "filesystem '" << fs_name << "' does not exist";
605 // Check that no MDS daemons are active
606 if (fs->mds_map.get_num_up_mds() > 0) {
607 ss << "all MDS daemons must be inactive before removing filesystem";
611 // Check for confirmation flag
613 cmd_getval(g_ceph_context, cmdmap, "sure", sure);
614 if (sure != "--yes-i-really-mean-it") {
615 ss << "this is a DESTRUCTIVE operation and will make data in your filesystem permanently" \
616 " inaccessible. Add --yes-i-really-mean-it if you are sure you wish to continue.";
620 if (fsmap.get_legacy_client_fscid() == fs->fscid) {
621 fsmap.set_legacy_client_fscid(FS_CLUSTER_ID_NONE);
624 std::vector<mds_gid_t> to_fail;
625 // There may be standby_replay daemons left here
626 for (const auto &i : fs->mds_map.get_mds_info()) {
627 assert(i.second.state == MDSMap::STATE_STANDBY_REPLAY);
628 to_fail.push_back(i.first);
631 for (const auto &gid : to_fail) {
632 // Standby replays don't write, so it isn't important to
633 // wait for an osdmap propose here: ignore return value.
634 mon->mdsmon()->fail_mds_gid(gid);
637 fsmap.erase_filesystem(fs->fscid);
643 class ResetFilesystemHandler : public FileSystemCommandHandler
646 ResetFilesystemHandler()
647 : FileSystemCommandHandler("fs reset")
654 map<string, cmd_vartype> &cmdmap,
655 std::stringstream &ss) override
658 cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name);
659 auto fs = fsmap.get_filesystem(fs_name);
661 ss << "filesystem '" << fs_name << "' does not exist";
662 // Unlike fs rm, we consider this case an error
666 // Check that no MDS daemons are active
667 if (fs->mds_map.get_num_up_mds() > 0) {
668 ss << "all MDS daemons must be inactive before resetting filesystem: set the cluster_down flag"
669 " and use `ceph mds fail` to make this so";
673 // Check for confirmation flag
675 cmd_getval(g_ceph_context, cmdmap, "sure", sure);
676 if (sure != "--yes-i-really-mean-it") {
677 ss << "this is a potentially destructive operation, only for use by experts in disaster recovery. "
678 "Add --yes-i-really-mean-it if you are sure you wish to continue.";
682 fsmap.reset_filesystem(fs->fscid);
688 class RemoveDataPoolHandler : public FileSystemCommandHandler
691 RemoveDataPoolHandler()
692 : FileSystemCommandHandler("fs rm_data_pool")
699 map<string, cmd_vartype> &cmdmap,
700 std::stringstream &ss) override
703 cmd_getval(g_ceph_context, cmdmap, "pool", poolname);
706 if (!cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name)
707 || fs_name.empty()) {
708 ss << "Missing filesystem name";
712 auto fs = fsmap.get_filesystem(fs_name);
714 ss << "Not found: '" << fs_name << "'";
718 int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname);
721 poolid = strict_strtol(poolname.c_str(), 10, &err);
723 ss << "pool '" << poolname << "' does not exist";
725 } else if (poolid < 0) {
726 ss << "invalid pool id '" << poolid << "'";
731 assert(poolid >= 0); // Checked by parsing code above
733 if (fs->mds_map.get_first_data_pool() == poolid) {
734 ss << "cannot remove default data pool";
740 fsmap.modify_filesystem(fs->fscid,
741 [&r, poolid](std::shared_ptr<Filesystem> fs)
743 r = fs->mds_map.remove_data_pool(poolid);
746 // It was already removed, succeed in silence
749 // We removed it, succeed
750 ss << "removed data pool " << poolid << " from fsmap";
753 // Unexpected error, bubble up
761 * For commands that refer to a particular filesystem,
762 * enable wrapping to implement the legacy version of
763 * the command (like "mds add_data_pool" vs "fs add_data_pool")
765 * The wrapped handler must expect a fs_name argument in
769 class LegacyHandler : public T
771 std::string legacy_prefix;
774 template <typename... Args>
775 LegacyHandler(const std::string &new_prefix, Args&&... args)
776 : T(std::forward<Args>(args)...)
778 legacy_prefix = new_prefix;
781 std::string const &get_prefix() override {return legacy_prefix;}
787 map<string, cmd_vartype> &cmdmap,
788 std::stringstream &ss) override
790 auto fs = fsmap.get_legacy_filesystem();
792 ss << "No filesystem configured";
795 std::map<string, cmd_vartype> modified = cmdmap;
796 modified["fs_name"] = fs->mds_map.get_fs_name();
797 return T::handle(mon, fsmap, op, modified, ss);
802 * For commands with an alternative prefix
805 class AliasHandler : public T
807 std::string alias_prefix;
810 AliasHandler(const std::string &new_prefix)
813 alias_prefix = new_prefix;
816 std::string const &get_prefix() override {return alias_prefix;}
822 map<string, cmd_vartype> &cmdmap,
823 std::stringstream &ss) override
825 return T::handle(mon, fsmap, op, cmdmap, ss);
830 std::list<std::shared_ptr<FileSystemCommandHandler> >
831 FileSystemCommandHandler::load(Paxos *paxos)
833 std::list<std::shared_ptr<FileSystemCommandHandler> > handlers;
835 handlers.push_back(std::make_shared<SetHandler>());
836 handlers.push_back(std::make_shared<LegacyHandler<SetHandler> >("mds set"));
837 handlers.push_back(std::make_shared<FlagSetHandler>());
838 handlers.push_back(std::make_shared<AddDataPoolHandler>(paxos));
839 handlers.push_back(std::make_shared<LegacyHandler<AddDataPoolHandler> >(
840 "mds add_data_pool", paxos));
841 handlers.push_back(std::make_shared<RemoveDataPoolHandler>());
842 handlers.push_back(std::make_shared<LegacyHandler<RemoveDataPoolHandler> >(
843 "mds remove_data_pool"));
844 handlers.push_back(std::make_shared<LegacyHandler<RemoveDataPoolHandler> >(
845 "mds rm_data_pool"));
846 handlers.push_back(std::make_shared<FsNewHandler>(paxos));
847 handlers.push_back(std::make_shared<RemoveFilesystemHandler>());
848 handlers.push_back(std::make_shared<ResetFilesystemHandler>());
850 handlers.push_back(std::make_shared<SetDefaultHandler>());
851 handlers.push_back(std::make_shared<AliasHandler<SetDefaultHandler> >(
857 int FileSystemCommandHandler::parse_bool(
858 const std::string &bool_str,
862 assert(result != nullptr);
865 int64_t n = strict_strtoll(bool_str.c_str(), 10, &interr);
867 if (bool_str == "false" || bool_str == "no"
868 || (interr.length() == 0 && n == 0)) {
871 } else if (bool_str == "true" || bool_str == "yes"
872 || (interr.length() == 0 && n == 1)) {
876 ss << "value must be false|no|0 or true|yes|1";
881 int FileSystemCommandHandler::_check_pool(
883 const int64_t pool_id,
886 std::stringstream *ss) const
890 const pg_pool_t *pool = osd_map.get_pg_pool(pool_id);
892 *ss << "pool id '" << pool_id << "' does not exist";
896 const string& pool_name = osd_map.get_pool_name(pool_id);
898 if (pool->is_erasure() && metadata) {
899 *ss << "pool '" << pool_name << "' (id '" << pool_id << "')"
900 << " is an erasure-coded pool. Use of erasure-coded pools"
901 << " for CephFS metadata is not permitted";
903 } else if (pool->is_erasure() && !pool->allows_ecoverwrites()) {
904 // non-overwriteable EC pools are only acceptable with a cache tier overlay
905 if (!pool->has_tiers() || !pool->has_read_tier() || !pool->has_write_tier()) {
906 *ss << "pool '" << pool_name << "' (id '" << pool_id << "')"
907 << " is an erasure-coded pool, with no overwrite support";
911 // That cache tier overlay must be writeback, not readonly (it's the
912 // write operations like modify+truncate we care about support for)
913 const pg_pool_t *write_tier = osd_map.get_pg_pool(
915 assert(write_tier != NULL); // OSDMonitor shouldn't allow DNE tier
916 if (write_tier->cache_mode == pg_pool_t::CACHEMODE_FORWARD
917 || write_tier->cache_mode == pg_pool_t::CACHEMODE_READONLY) {
918 *ss << "EC pool '" << pool_name << "' has a write tier ("
919 << osd_map.get_pool_name(pool->write_tier)
920 << ") that is configured "
921 "to forward writes. Use a cache mode such as 'writeback' for "
927 if (pool->is_tier()) {
928 *ss << " pool '" << pool_name << "' (id '" << pool_id
929 << "') is already in use as a cache tier.";
933 if (!force && !pool->application_metadata.empty() &&
934 pool->application_metadata.count(
935 pg_pool_t::APPLICATION_NAME_CEPHFS) == 0) {
936 *ss << " pool '" << pool_name << "' (id '" << pool_id
937 << "') has a non-CephFS application enabled.";
941 // Nothing special about this pool, so it is permissible