1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2012 Inktank, Inc.
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
13 #include <boost/program_options/variables_map.hpp>
14 #include <boost/program_options/parsers.hpp>
15 #include <boost/scope_exit.hpp>
20 #include "common/Formatter.h"
21 #include "common/errno.h"
23 #include "auth/KeyRing.h"
24 #include "auth/cephx/CephxKeyServer.h"
25 #include "global/global_init.h"
26 #include "include/stringify.h"
27 #include "mgr/mgr_commands.h"
28 #include "mon/AuthMonitor.h"
29 #include "mon/MonitorDBStore.h"
30 #include "mon/Paxos.h"
31 #include "mon/MonMap.h"
32 #include "mds/MDSMap.h"
33 #include "osd/OSDMap.h"
34 #include "crush/CrushCompiler.h"
36 namespace po = boost::program_options;
42 MonitorDBStore::TransactionRef t;
44 explicit TraceIter(string fname) : fd(-1), idx(-1) {
45 fd = ::open(fname.c_str(), O_RDONLY);
46 t.reset(new MonitorDBStore::Transaction);
51 MonitorDBStore::TransactionRef cur() {
55 unsigned num() { return idx; }
59 int r = bl.read_fd(fd, 6);
61 std::cerr << "Got error: " << cpp_strerror(r) << " on read_fd"
66 } else if ((unsigned)r < 6) {
67 std::cerr << "short read" << std::endl;
72 bufferlist::iterator bliter = bl.begin();
74 ::decode(ver, bliter);
75 ::decode(ver2, bliter);
77 ::decode(len, bliter);
78 r = bl.read_fd(fd, len);
80 std::cerr << "Got error: " << cpp_strerror(r) << " on read_fd"
85 } else if ((unsigned)r < len) {
86 std::cerr << "short read" << std::endl;
92 t.reset(new MonitorDBStore::Transaction);
108 po::options_description *desc, /// < visible options description
109 po::options_description *hidden_desc, /// < hidden options description
110 po::positional_options_description *positional, /// < positional args
111 vector<string> &cmd_args, /// < arguments to be parsed
112 po::variables_map *vm /// > post-parsing variable map
115 // desc_all will aggregate all visible and hidden options for parsing.
117 // From boost's program_options point of view, there is absolutely no
118 // distinction between 'desc' and 'hidden_desc'. This is a distinction
119 // that is only useful to us: 'desc' is whatever we are willing to show
120 // on 'usage()', whereas 'hidden_desc' refers to parameters we wish to
121 // take advantage of but do not wish to show on 'usage()'.
123 // For example, consider that program_options matches positional arguments
124 // (specified via 'positional') against the paramenters defined on a
125 // given 'po::options_description' class. This is performed below,
126 // supplying both the description and the positional arguments to the
127 // parser. However, we do not want the parameters that are mapped to
128 // positional arguments to be shown on usage, as that makes for ugly and
129 // confusing usage messages. Therefore we dissociate the options'
130 // description that is to be used as an aid to the user from those options
131 // that are nothing but useful for internal purposes (i.e., mapping options
132 // to positional arguments). We still need to aggregate them before parsing
133 // and that's what 'desc_all' is all about.
136 assert(desc != NULL);
138 po::options_description desc_all;
140 if (hidden_desc != NULL)
141 desc_all.add(*hidden_desc);
144 po::command_line_parser parser = po::command_line_parser(cmd_args).
148 parser = parser.positional(*positional);
151 po::parsed_options parsed = parser.run();
152 po::store(parsed, *vm);
154 } catch (po::error &e) {
155 std::cerr << "error: " << e.what() << std::endl;
163 * usage: ceph-monstore-tool <store-path> <command> [options]
167 * store-copy < --out arg >
170 * getmonmap < --out arg [ --version arg ] >
171 * getosdmap < --out arg [ --version arg ] >
172 * dump-paxos <--dump-start VER> <--dump-end VER>
173 * dump-trace < --trace-file arg >
181 * ceph-monstore-tool PATH CMD [options]
183 * ceph-monstore-tool PATH store-copy <PATH2 | -o PATH2>
184 * ceph-monstore-tool PATH dump-keys
185 * ceph-monstore-tool PATH compact
186 * ceph-monstore-tool PATH get monmap [VER]
187 * ceph-monstore-tool PATH get osdmap [VER]
188 * ceph-monstore-tool PATH dump-paxos STARTVER ENDVER
192 void usage(const char *n, po::options_description &d)
195 "usage: " << n << " <store-path> <cmd> [args|options]\n"
198 << " store-copy PATH copies store to PATH\n"
199 << " compact compacts the store\n"
200 << " get monmap [-- options] get monmap (version VER if specified)\n"
201 << " (default: last committed)\n"
202 << " get osdmap [-- options] get osdmap (version VER if specified)\n"
203 << " (default: last committed)\n"
204 << " get mdsmap [-- options] get mdsmap (version VER if specified)\n"
205 << " (default: last committed)\n"
206 << " get crushmap [-- options] get crushmap (version VER if specified)\n"
207 << " (default: last committed)\n"
208 << " show-versions [-- options] show the first&last committed version of map\n"
209 << " (show-versions -- --help for more info)\n"
210 << " dump-keys dumps store keys to FILE\n"
211 << " (default: stdout)\n"
212 << " dump-paxos [-- options] dump paxos transactions\n"
213 << " (dump-paxos -- --help for more info)\n"
214 << " dump-trace FILE [-- options] dump contents of trace file FILE\n"
215 << " (dump-trace -- --help for more info)\n"
216 << " replay-trace FILE [-- options] replay trace from FILE\n"
217 << " (replay-trace -- --help for more info)\n"
218 << " random-gen [-- options] add randomly generated ops to the store\n"
219 << " (random-gen -- --help for more info)\n"
220 << " rewrite-crush [-- options] add a rewrite commit to the store\n"
221 << " (rewrite-crush -- --help for more info)\n"
222 << " inflate-pgmap [-- options] add given number of pgmaps to store\n"
223 << " (inflate-pgmap -- --help for more info)\n"
224 << " rebuild rebuild store\n"
225 << " (rebuild -- --help for more info)\n"
227 std::cerr << d << std::endl;
229 << "\nPlease Note:\n"
230 << "* Ceph-specific options should be in the format --option-name=VAL\n"
231 << " (specifically, do not forget the '='!!)\n"
232 << "* Command-specific options need to be passed after a '--'\n"
233 << " e.g., 'get monmap -- --version 10 --out /tmp/foo'"
237 int update_osdmap(MonitorDBStore& store, version_t ver, bool copy,
238 ceph::shared_ptr<CrushWrapper> crush,
239 MonitorDBStore::Transaction* t) {
240 const string prefix("osdmap");
245 r = store.get(prefix, store.combine_strings("full", ver), bl);
247 std::cerr << "Error getting full map: " << cpp_strerror(r) << std::endl;
252 osdmap.crush = crush;
257 // be consistent with OSDMonitor::update_from_paxos()
258 osdmap.encode(bl, CEPH_FEATURES_ALL|CEPH_FEATURE_RESERVED);
259 t->put(prefix, store.combine_strings("full", osdmap.get_epoch()), bl);
262 OSDMap::Incremental inc;
264 inc.epoch = osdmap.get_epoch();
265 inc.fsid = osdmap.get_fsid();
268 r = store.get(prefix, ver, bl);
270 std::cerr << "Error getting inc map: " << cpp_strerror(r) << std::endl;
273 OSDMap::Incremental inc(bl);
274 if (inc.crush.length()) {
276 crush->encode(inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
278 if (inc.fullmap.length()) {
280 fullmap.decode(inc.fullmap);
281 fullmap.crush = crush;
283 fullmap.encode(inc.fullmap);
286 assert(osdmap.have_crc());
287 inc.full_crc = osdmap.get_crc();
289 // be consistent with OSDMonitor::update_from_paxos()
290 inc.encode(bl, CEPH_FEATURES_ALL|CEPH_FEATURE_RESERVED);
291 t->put(prefix, inc.epoch, bl);
295 int rewrite_transaction(MonitorDBStore& store, int version,
296 const string& crush_file,
297 MonitorDBStore::Transaction* t) {
298 const string prefix("osdmap");
300 // calc the known-good epoch
301 version_t last_committed = store.get(prefix, "last_committed");
302 version_t good_version = 0;
304 if (last_committed >= (unsigned)-version) {
305 good_version = last_committed + version;
307 std::cerr << "osdmap-version is less than: -" << last_committed << std::endl;
311 good_version = version;
313 if (good_version >= last_committed) {
314 std::cout << "good epoch is greater or equal to the last committed one: "
315 << good_version << " >= " << last_committed << std::endl;
319 // load/extract the crush map
321 ceph::shared_ptr<CrushWrapper> crush(new CrushWrapper);
322 if (crush_file.empty()) {
324 r = store.get(prefix, store.combine_strings("full", good_version), bl);
326 std::cerr << "Error getting map: " << cpp_strerror(r) << std::endl;
331 crush = osdmap.crush;
335 r = bl.read_file(crush_file.c_str(), &err);
337 std::cerr << err << ": " << cpp_strerror(r) << std::endl;
340 bufferlist::iterator p = bl.begin();
344 // prepare a transaction to rewrite the epochs
345 // (good_version, last_committed]
346 // with the good crush map.
347 // XXX: may need to break this into several paxos versions?
348 assert(good_version < last_committed);
349 for (version_t v = good_version + 1; v <= last_committed; v++) {
350 cout << "rewriting epoch #" << v << "/" << last_committed << std::endl;
351 r = update_osdmap(store, v, false, crush, t);
356 // add a new osdmap epoch to store, so monitors will update their current osdmap
357 // in addition to the ones stored in epochs.
359 // This is needed due to the way the monitor updates from paxos and the
360 // facilities we are leveraging to push this update to the rest of the
363 // In a nutshell, we are generating a good version of the osdmap, with a
364 // proper crush, and building a transaction that will replace the bad
365 // osdmaps with good osdmaps. But this transaction needs to be applied on
366 // all nodes, so that the monitors will have good osdmaps to share with
367 // clients. We thus leverage Paxos, specifically the recovery mechanism, by
368 // creating a pending value that will be committed once the monitors form an
369 // initial quorum after being brought back to life.
371 // However, the way the monitor works has the paxos services, including the
372 // OSDMonitor, updating their state from disk *prior* to the recovery phase
373 // begins (so they have an up to date state in memory). This means the
374 // OSDMonitor will see the old, broken map, before the new paxos version is
375 // applied to disk, and the old version is cached. Even though we have the
376 // good map now, and we share the good map with clients, we will still be
377 // working on the old broken map. Instead of mucking around the monitor to
378 // make this work, we instead opt for adding the same osdmap but with a
379 // newer version, so that the OSDMonitor picks up on it when it updates from
380 // paxos after the proposal has been committed. This is not elegant, but
381 // avoids further unpleasantness that would arise from kludging around the
382 // current behavior. Also, has the added benefit of making sure the clients
383 // get an updated version of the map (because last_committed+1 >
384 // last_committed) :)
386 cout << "adding a new epoch #" << last_committed+1 << std::endl;
387 r = update_osdmap(store, last_committed++, true, crush, t);
390 t->put(prefix, store.combine_strings("full", "latest"), last_committed);
391 t->put(prefix, "last_committed", last_committed);
396 * create a new paxos version which carries a proposal to rewrite all epochs
397 * of incremental and full map of "osdmap" after a faulty crush map is injected.
398 * so the leader will trigger a recovery and propagate this fix to its peons,
399 * after the proposal is accepted, and the transaction in it is applied. all
400 * monitors will rewrite the bad crush map with the good one, and have a new
401 * osdmap epoch with the good crush map in it.
403 int rewrite_crush(const char* progname,
404 vector<string>& subcmds,
405 MonitorDBStore& store) {
406 po::options_description op_desc("Allowed 'rewrite-crush' options");
409 op_desc.add_options()
410 ("help,h", "produce this help message")
411 ("crush", po::value<string>(&crush_file),
412 ("path to the crush map file "
413 "(default: will instead extract it from the known-good osdmap)"))
414 ("good-epoch", po::value<int>(&version),
415 "known-good epoch of osdmap, if a negative number '-N' is given, the "
416 "$last_committed-N is used instead (default: -1). "
417 "Please note, -1 is not necessarily a good epoch, because there are "
418 "good chance that we have more epochs slipped into the monstore after "
419 "the one where the crushmap is firstly injected.")
421 po::variables_map op_vm;
422 int r = parse_cmd_args(&op_desc, NULL, NULL, subcmds, &op_vm);
426 if (op_vm.count("help")) {
427 usage(progname, op_desc);
431 MonitorDBStore::Transaction rewrite_txn;
432 r = rewrite_transaction(store, version, crush_file, &rewrite_txn);
437 // store the transaction into store as a proposal
438 const string prefix("paxos");
439 version_t pending_v = store.get(prefix, "last_committed") + 1;
440 auto t(std::make_shared<MonitorDBStore::Transaction>());
442 rewrite_txn.encode(bl);
443 cout << "adding pending commit " << pending_v
444 << " " << bl.length() << " bytes" << std::endl;
445 t->put(prefix, pending_v, bl);
446 t->put(prefix, "pending_v", pending_v);
447 // a large enough yet unique proposal number will probably do the trick
448 version_t pending_pn = (store.get(prefix, "accepted_pn") / 100 + 4) * 100 + 1;
449 t->put(prefix, "pending_pn", pending_pn);
450 store.apply_transaction(t);
454 int inflate_pgmap(MonitorDBStore& st, unsigned n, bool can_be_trimmed) {
455 // put latest pg map into monstore to bloat it up
456 // only format version == 1 is supported
457 version_t last = st.get("pgmap", "last_committed");
460 // get the latest delta
461 int r = st.get("pgmap", last, bl);
463 std::cerr << "Error getting pgmap: " << cpp_strerror(r) << std::endl;
467 // try to pull together an idempotent "delta"
468 ceph::unordered_map<pg_t, pg_stat_t> pg_stat;
469 for (KeyValueDB::Iterator i = st.get_iterator("pgmap_pg");
470 i->valid(); i->next()) {
472 if (!pgid.parse(i->key().c_str())) {
473 std::cerr << "unable to parse key " << i->key() << std::endl;
476 bufferlist pg_bl = i->value();
478 bufferlist::iterator p = pg_bl.begin();
480 // will update the last_epoch_clean of all the pgs.
484 version_t first = st.get("pgmap", "first_committed");
485 version_t ver = last;
486 auto txn(std::make_shared<MonitorDBStore::Transaction>());
487 for (unsigned i = 0; i < n; i++) {
489 bufferlist dirty_pgs;
490 for (ceph::unordered_map<pg_t, pg_stat_t>::iterator ps = pg_stat.begin();
491 ps != pg_stat.end(); ++ps) {
492 ::encode(ps->first, dirty_pgs);
493 if (!can_be_trimmed) {
494 ps->second.last_epoch_clean = first;
496 ::encode(ps->second, dirty_pgs);
498 utime_t inc_stamp = ceph_clock_now();
499 ::encode(inc_stamp, trans_bl);
500 ::encode_destructively(dirty_pgs, trans_bl);
501 bufferlist dirty_osds;
502 ::encode(dirty_osds, trans_bl);
503 txn->put("pgmap", ++ver, trans_bl);
504 // update the db in batch
505 if (txn->size() > 1024) {
506 st.apply_transaction(txn);
507 // reset the transaction
508 txn.reset(new MonitorDBStore::Transaction);
511 txn->put("pgmap", "last_committed", ver);
512 txn->put("pgmap_meta", "version", ver);
513 // this will also piggy back the leftover pgmap added in the loop above
514 st.apply_transaction(txn);
518 static int update_auth(MonitorDBStore& st, const string& keyring_path)
520 // import all keyrings stored in the keyring file
522 int r = keyring.load(g_ceph_context, keyring_path);
524 cerr << "unable to load admin keyring: " << keyring_path << std::endl;
532 for (const auto& k : keyring.get_keys()) {
533 KeyServerData::Incremental auth_inc;
534 auth_inc.name = k.first;
535 auth_inc.auth = k.second;
536 if (auth_inc.auth.caps.empty()) {
537 cerr << "no caps granted to: " << auth_inc.name << std::endl;
540 auth_inc.op = KeyServerData::AUTH_INC_ADD;
542 AuthMonitor::Incremental inc;
543 inc.inc_type = AuthMonitor::AUTH_DATA;
544 ::encode(auth_inc, inc.auth_data);
545 inc.auth_type = CEPH_AUTH_CEPHX;
547 inc.encode(bl, CEPH_FEATURES_ALL);
550 const string prefix("auth");
551 auto last_committed = st.get(prefix, "last_committed") + 1;
552 auto t = make_shared<MonitorDBStore::Transaction>();
553 t->put(prefix, last_committed, bl);
554 t->put(prefix, "last_committed", last_committed);
555 auto first_committed = st.get(prefix, "first_committed");
556 if (!first_committed) {
557 t->put(prefix, "first_committed", last_committed);
559 st.apply_transaction(t);
563 static int update_mkfs(MonitorDBStore& st)
566 int r = monmap.build_initial(g_ceph_context, cerr);
568 cerr << "no initial monitors" << std::endl;
572 monmap.encode(bl, CEPH_FEATURES_ALL);
574 auto t = make_shared<MonitorDBStore::Transaction>();
575 t->put("mkfs", "monmap", bl);
576 st.apply_transaction(t);
580 static int update_monitor(MonitorDBStore& st)
582 const string prefix("monitor");
583 // a stripped-down Monitor::mkfs()
585 bl.append(CEPH_MON_ONDISK_MAGIC "\n");
586 auto t = make_shared<MonitorDBStore::Transaction>();
587 t->put(prefix, "magic", bl);
588 st.apply_transaction(t);
592 static int update_mgrmap(MonitorDBStore& st)
594 auto t = make_shared<MonitorDBStore::Transaction>();
598 // mgr expects epoch > 1
600 auto initial_modules =
601 get_str_vec(g_ceph_context->_conf->get_val<string>("mgr_initial_modules"));
602 copy(begin(initial_modules),
603 end(initial_modules),
604 inserter(map.modules, end(map.modules)));
606 map.encode(bl, CEPH_FEATURES_ALL);
607 t->put("mgr", map.epoch, bl);
608 t->put("mgr", "last_committed", map.epoch);
611 auto mgr_command_descs = mgr_commands;
612 for (auto& c : mgr_command_descs) {
613 c.set_flag(MonCommand::FLAG_MGR);
616 ::encode(mgr_command_descs, bl);
617 t->put("mgr_command_desc", "", bl);
619 return st.apply_transaction(t);
622 static int update_paxos(MonitorDBStore& st)
624 // build a pending paxos proposal from all non-permanent k/v pairs. once the
625 // proposal is committed, it will gets applied. on the sync provider side, it
626 // will be a no-op, but on its peers, the paxos commit will help to build up
627 // the necessary epochs.
628 bufferlist pending_proposal;
630 MonitorDBStore::Transaction t;
631 vector<string> prefixes = {"auth", "osdmap",
632 "mgr", "mgr_command_desc",
633 "pgmap", "pgmap_pg", "pgmap_meta"};
634 for (const auto& prefix : prefixes) {
635 for (auto i = st.get_iterator(prefix); i->valid(); i->next()) {
636 auto key = i->raw_key();
637 auto val = i->value();
638 t.put(key.first, key.second, val);
641 t.encode(pending_proposal);
643 const string prefix("paxos");
644 auto t = make_shared<MonitorDBStore::Transaction>();
645 t->put(prefix, "first_committed", 0);
646 t->put(prefix, "last_committed", 0);
648 t->put(prefix, pending_v, pending_proposal);
649 t->put(prefix, "pending_v", pending_v);
650 t->put(prefix, "pending_pn", 400);
651 st.apply_transaction(t);
656 // - pgmap_meta/version
657 // - pgmap_meta/last_osdmap_epoch
658 // - pgmap_meta/last_pg_scan
659 // - pgmap_meta/full_ratio
660 // - pgmap_meta/nearfull_ratio
661 // - pgmap_meta/stamp
662 static int update_pgmap_meta(MonitorDBStore& st)
664 const string prefix("pgmap_meta");
665 auto t = make_shared<MonitorDBStore::Transaction>();
666 // stolen from PGMonitor::create_pending()
667 // the first pgmap_meta
668 t->put(prefix, "version", 1);
670 auto stamp = ceph_clock_now();
673 t->put(prefix, "stamp", bl);
676 auto last_osdmap_epoch = st.get("osdmap", "last_committed");
677 t->put(prefix, "last_osdmap_epoch", last_osdmap_epoch);
679 // be conservative, so PGMonitor will scan the all pools for pg changes
680 t->put(prefix, "last_pg_scan", 1);
682 auto full_ratio = g_ceph_context->_conf->mon_osd_full_ratio;
683 if (full_ratio > 1.0)
686 ::encode(full_ratio, bl);
687 t->put(prefix, "full_ratio", bl);
690 auto backfillfull_ratio = g_ceph_context->_conf->mon_osd_backfillfull_ratio;
691 if (backfillfull_ratio > 1.0)
692 backfillfull_ratio /= 100.0;
694 ::encode(backfillfull_ratio, bl);
695 t->put(prefix, "backfillfull_ratio", bl);
698 auto nearfull_ratio = g_ceph_context->_conf->mon_osd_nearfull_ratio;
699 if (nearfull_ratio > 1.0)
700 nearfull_ratio /= 100.0;
702 ::encode(nearfull_ratio, bl);
703 t->put(prefix, "nearfull_ratio", bl);
705 st.apply_transaction(t);
709 int rebuild_monstore(const char* progname,
710 vector<string>& subcmds,
713 po::options_description op_desc("Allowed 'rebuild' options");
715 op_desc.add_options()
716 ("keyring", po::value<string>(&keyring_path),
717 "path to the client.admin key");
718 po::variables_map op_vm;
719 int r = parse_cmd_args(&op_desc, nullptr, nullptr, subcmds, &op_vm);
723 if (op_vm.count("help")) {
724 usage(progname, op_desc);
727 if (!keyring_path.empty())
728 update_auth(st, keyring_path);
729 if ((r = update_pgmap_meta(st))) {
732 if ((r = update_paxos(st))) {
735 if ((r = update_mkfs(st))) {
738 if ((r = update_monitor(st))) {
741 if ((r = update_mgrmap(st))) {
747 int main(int argc, char **argv) {
749 po::options_description desc("Allowed options");
750 string store_path, cmd;
751 vector<string> subcmds;
753 ("help,h", "produce help message")
756 /* Dear Future Developer:
758 * for further improvement, should you need to pass specific options to
759 * a command (e.g., get osdmap VER --hex), you can expand the current
760 * format by creating additional 'po::option_description' and passing
761 * 'subcmds' to 'po::command_line_parser', much like what is currently
762 * done by default. However, beware: in order to differentiate a
763 * command-specific option from the generic/global options, you will need
764 * to pass '--' in the command line (so that the first parser, the one
765 * below, assumes it has reached the end of all options); e.g.,
766 * 'get osdmap VER -- --hex'. Not pretty; far from intuitive; it was as
767 * far as I got with this library. Improvements on this format will be
768 * left as an excercise for the reader. -Joao
770 po::options_description positional_desc("Positional argument options");
771 positional_desc.add_options()
772 ("store-path", po::value<string>(&store_path),
773 "path to monitor's store")
774 ("command", po::value<string>(&cmd),
776 ("subcmd", po::value<vector<string> >(&subcmds),
777 "Command arguments/Sub-Commands")
779 po::positional_options_description positional;
780 positional.add("store-path", 1);
781 positional.add("command", 1);
782 positional.add("subcmd", -1);
784 po::options_description all_desc("All options");
785 all_desc.add(desc).add(positional_desc);
787 vector<string> ceph_option_strings;
788 po::variables_map vm;
790 po::parsed_options parsed =
791 po::command_line_parser(argc, argv).
793 positional(positional).
794 allow_unregistered().run();
801 // Specifying po::include_positional would have our positional arguments
802 // being collected (thus being part of ceph_option_strings and eventually
803 // passed on to global_init() below).
804 // Instead we specify po::exclude_positional, which has the upside of
805 // completely avoid this, but the downside of having to specify ceph
806 // options as --VAR=VAL (note the '='); otherwise we will capture the
807 // positional 'VAL' as belonging to us, never being collected.
808 ceph_option_strings = po::collect_unrecognized(parsed.options,
809 po::exclude_positional);
811 } catch(po::error &e) {
812 std::cerr << "error: " << e.what() << std::endl;
816 // parse command structure before calling global_init() and friends.
818 if (vm.empty() || vm.count("help") ||
819 store_path.empty() || cmd.empty() ||
820 *cmd.begin() == '-') {
821 usage(argv[0], desc);
825 vector<const char *> ceph_options, def_args;
826 ceph_options.reserve(ceph_option_strings.size());
827 for (vector<string>::iterator i = ceph_option_strings.begin();
828 i != ceph_option_strings.end();
830 ceph_options.push_back(i->c_str());
833 auto cct = global_init(
834 &def_args, ceph_options, CEPH_ENTITY_TYPE_MON,
835 CODE_ENVIRONMENT_UTILITY, 0);
836 common_init_finish(g_ceph_context);
837 g_ceph_context->_conf->apply_changes(NULL);
838 g_conf = g_ceph_context->_conf;
840 // this is where we'll write *whatever*, on a per-command basis.
841 // not all commands require some place to write their things.
842 MonitorDBStore st(store_path);
843 if (store_path.size()) {
847 std::cerr << ss.str() << std::endl;
852 if (cmd == "dump-keys") {
853 KeyValueDB::WholeSpaceIterator iter = st.get_iterator();
854 while (iter->valid()) {
855 pair<string,string> key(iter->raw_key());
856 cout << key.first << " / " << key.second << std::endl;
859 } else if (cmd == "compact") {
861 } else if (cmd == "get") {
864 bool readable = false;
866 // visible options for this command
867 po::options_description op_desc("Allowed 'get' options");
868 op_desc.add_options()
869 ("help,h", "produce this help message")
870 ("out,o", po::value<string>(&outpath),
871 "output file (default: stdout)")
872 ("version,v", po::value<unsigned>(&v),
873 "map version to obtain")
874 ("readable,r", po::value<bool>(&readable)->default_value(false),
875 "print the map infomation in human readable format")
877 // this is going to be a positional argument; we don't want to show
878 // it as an option during --help, but we do want to have it captured
880 po::options_description hidden_op_desc("Hidden 'get' options");
881 hidden_op_desc.add_options()
882 ("map-type", po::value<string>(&map_type),
885 po::positional_options_description op_positional;
886 op_positional.add("map-type", 1);
888 po::variables_map op_vm;
889 int r = parse_cmd_args(&op_desc, &hidden_op_desc, &op_positional,
896 if (op_vm.count("help") || map_type.empty()) {
897 usage(argv[0], op_desc);
903 if (map_type == "crushmap") {
904 v = st.get("osdmap", "last_committed");
906 v = st.get(map_type, "last_committed");
910 int fd = STDOUT_FILENO;
911 if (!outpath.empty()){
912 fd = ::open(outpath.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
914 std::cerr << "error opening output file: "
915 << cpp_strerror(errno) << std::endl;
921 BOOST_SCOPE_EXIT((&r) (&fd) (&outpath)) {
923 if (r < 0 && fd != STDOUT_FILENO) {
924 ::remove(outpath.c_str());
926 } BOOST_SCOPE_EXIT_END
930 if (map_type == "osdmap") {
931 r = st.get(map_type, st.combine_strings("full", v), bl);
932 } else if (map_type == "crushmap") {
934 r = st.get("osdmap", st.combine_strings("full", v), tmp);
938 osdmap.crush->encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT);
941 r = st.get(map_type, v, bl);
944 std::cerr << "Error getting map: " << cpp_strerror(r) << std::endl;
952 if (map_type == "monmap") {
956 } else if (map_type == "osdmap") {
960 } else if (map_type == "mdsmap") {
964 } else if (map_type == "crushmap") {
966 bufferlist::iterator it = bl.begin();
968 CrushCompiler cc(cw, std::cerr, 0);
971 std::cerr << "This type of readable map does not exist: " << map_type << std::endl
972 << "You can only specify[osdmap|monmap|mdsmap|crushmap]" << std::endl;
980 if (!outpath.empty()) {
981 std::cout << "wrote " << map_type
982 << " version " << v << " to " << outpath
985 } else if (cmd == "show-versions") {
986 string map_type; //map type:osdmap,monmap...
987 // visible options for this command
988 po::options_description op_desc("Allowed 'show-versions' options");
989 op_desc.add_options()
990 ("help,h", "produce this help message")
991 ("map-type", po::value<string>(&map_type), "map_type");
993 po::positional_options_description op_positional;
994 op_positional.add("map-type", 1);
996 po::variables_map op_vm;
997 int r = parse_cmd_args(&op_desc, NULL, &op_positional,
1004 if (op_vm.count("help") || map_type.empty()) {
1005 usage(argv[0], op_desc);
1010 unsigned int v_first = 0;
1011 unsigned int v_last = 0;
1012 v_first = st.get(map_type, "first_committed");
1013 v_last = st.get(map_type, "last_committed");
1015 std::cout << "first committed:\t" << v_first << "\n"
1016 << "last committed:\t" << v_last << std::endl;
1017 } else if (cmd == "dump-paxos") {
1018 unsigned dstart = 0;
1019 unsigned dstop = ~0;
1020 po::options_description op_desc("Allowed 'dump-paxos' options");
1021 op_desc.add_options()
1022 ("help,h", "produce this help message")
1023 ("start,s", po::value<unsigned>(&dstart),
1024 "starting version (default: 0)")
1025 ("end,e", po::value<unsigned>(&dstop),
1026 "finish version (default: ~0)")
1029 po::variables_map op_vm;
1030 int r = parse_cmd_args(&op_desc, NULL, NULL,
1037 if (op_vm.count("help")) {
1038 usage(argv[0], op_desc);
1043 if (dstart > dstop) {
1044 std::cerr << "error: 'start' version (value: " << dstart << ") "
1045 << " is greater than 'end' version (value: " << dstop << ")"
1051 version_t v = dstart;
1052 for (; v <= dstop; ++v) {
1054 st.get("paxos", v, bl);
1055 if (bl.length() == 0)
1057 cout << "\n--- " << v << " ---" << std::endl;
1058 auto tx(std::make_shared<MonitorDBStore::Transaction>());
1059 Paxos::decode_append_transaction(tx, bl);
1060 JSONFormatter f(true);
1065 std::cout << "dumped " << v << " paxos versions" << std::endl;
1067 } else if (cmd == "dump-trace") {
1068 unsigned dstart = 0;
1069 unsigned dstop = ~0;
1072 // visible options for this command
1073 po::options_description op_desc("Allowed 'dump-trace' options");
1074 op_desc.add_options()
1075 ("help,h", "produce this help message")
1076 ("start,s", po::value<unsigned>(&dstart),
1077 "starting version (default: 0)")
1078 ("end,e", po::value<unsigned>(&dstop),
1079 "finish version (default: ~0)")
1081 // this is going to be a positional argument; we don't want to show
1082 // it as an option during --help, but we do want to have it captured
1084 po::options_description hidden_op_desc("Hidden 'dump-trace' options");
1085 hidden_op_desc.add_options()
1086 ("out,o", po::value<string>(&outpath),
1087 "file to write the dump to")
1089 po::positional_options_description op_positional;
1090 op_positional.add("out", 1);
1092 po::variables_map op_vm;
1093 int r = parse_cmd_args(&op_desc, &hidden_op_desc, &op_positional,
1100 if (op_vm.count("help")) {
1101 usage(argv[0], op_desc);
1106 if (outpath.empty()) {
1107 usage(argv[0], op_desc);
1112 if (dstart > dstop) {
1113 std::cerr << "error: 'start' version (value: " << dstart << ") "
1114 << " is greater than 'stop' version (value: " << dstop << ")"
1120 TraceIter iter(outpath.c_str());
1125 if (iter.num() >= dstop) {
1128 if (iter.num() >= dstart) {
1129 JSONFormatter f(true);
1130 iter.cur()->dump(&f, false);
1132 std::cout << std::endl;
1136 std::cerr << "Read up to transaction " << iter.num() << std::endl;
1137 } else if (cmd == "replay-trace") {
1139 unsigned num_replays = 1;
1140 // visible options for this command
1141 po::options_description op_desc("Allowed 'replay-trace' options");
1142 op_desc.add_options()
1143 ("help,h", "produce this help message")
1144 ("num-replays,n", po::value<unsigned>(&num_replays),
1145 "finish version (default: 1)")
1147 // this is going to be a positional argument; we don't want to show
1148 // it as an option during --help, but we do want to have it captured
1150 po::options_description hidden_op_desc("Hidden 'replay-trace' options");
1151 hidden_op_desc.add_options()
1152 ("in,i", po::value<string>(&inpath),
1153 "file to write the dump to")
1155 po::positional_options_description op_positional;
1156 op_positional.add("in", 1);
1158 // op_desc_all will aggregate all visible and hidden options for parsing.
1159 // when we call 'usage()' we just pass 'op_desc', as that's the description
1160 // holding the visible options.
1161 po::options_description op_desc_all;
1162 op_desc_all.add(op_desc).add(hidden_op_desc);
1164 po::variables_map op_vm;
1166 po::parsed_options op_parsed = po::command_line_parser(subcmds).
1167 options(op_desc_all).positional(op_positional).run();
1168 po::store(op_parsed, op_vm);
1170 } catch (po::error &e) {
1171 std::cerr << "error: " << e.what() << std::endl;
1176 if (op_vm.count("help")) {
1177 usage(argv[0], op_desc);
1182 if (inpath.empty()) {
1183 usage(argv[0], op_desc);
1189 for (unsigned i = 0; i < num_replays; ++i) {
1190 TraceIter iter(inpath.c_str());
1195 std::cerr << "Replaying trans num " << num << std::endl;
1196 st.apply_transaction(iter.cur());
1200 std::cerr << "Read up to transaction " << iter.num() << std::endl;
1202 } else if (cmd == "random-gen") {
1203 unsigned tsize = 200;
1204 unsigned tvalsize = 1024;
1205 unsigned ntrans = 100;
1206 po::options_description op_desc("Allowed 'random-gen' options");
1207 op_desc.add_options()
1208 ("help,h", "produce this help message")
1209 ("num-keys,k", po::value<unsigned>(&tsize),
1210 "keys to write in each transaction (default: 200)")
1211 ("size,s", po::value<unsigned>(&tvalsize),
1212 "size (in bytes) of the value to write in each key (default: 1024)")
1213 ("ntrans,n", po::value<unsigned>(&ntrans),
1214 "number of transactions to run (default: 100)")
1217 po::variables_map op_vm;
1219 po::parsed_options op_parsed = po::command_line_parser(subcmds).
1220 options(op_desc).run();
1221 po::store(op_parsed, op_vm);
1223 } catch (po::error &e) {
1224 std::cerr << "error: " << e.what() << std::endl;
1229 if (op_vm.count("help")) {
1230 usage(argv[0], op_desc);
1236 for (unsigned i = 0; i < ntrans; ++i) {
1237 std::cerr << "Applying trans " << i << std::endl;
1238 auto t(std::make_shared<MonitorDBStore::Transaction>());
1240 prefix.push_back((i%26)+'a');
1241 for (unsigned j = 0; j < tsize; ++j) {
1245 for (unsigned k = 0; k < tvalsize; ++k) bl.append(rand());
1246 t->put(prefix, os.str(), bl);
1249 t->compact_prefix(prefix);
1250 st.apply_transaction(t);
1252 } else if (cmd == "store-copy") {
1253 if (subcmds.size() < 1 || subcmds[0].empty()) {
1254 usage(argv[0], desc);
1259 string out_path = subcmds[0];
1261 MonitorDBStore out_store(out_path);
1264 int r = out_store.create_and_open(ss);
1266 std::cerr << ss.str() << std::endl;
1272 KeyValueDB::WholeSpaceIterator it = st.get_iterator();
1273 uint64_t total_keys = 0;
1274 uint64_t total_size = 0;
1275 uint64_t total_tx = 0;
1278 uint64_t num_keys = 0;
1280 auto tx(std::make_shared<MonitorDBStore::Transaction>());
1282 while (it->valid() && num_keys < 128) {
1283 pair<string,string> k = it->raw_key();
1284 bufferlist v = it->value();
1285 tx->put(k.first, k.second, v);
1289 total_size += v.length();
1294 total_keys += num_keys;
1297 out_store.apply_transaction(tx);
1299 std::cout << "copied " << total_keys << " keys so far ("
1300 << stringify(si_t(total_size)) << ")" << std::endl;
1302 } while (it->valid());
1304 std::cout << "summary: copied " << total_keys << " keys, using "
1305 << total_tx << " transactions, totalling "
1306 << stringify(si_t(total_size)) << std::endl;
1307 std::cout << "from '" << store_path << "' to '" << out_path << "'"
1309 } else if (cmd == "rewrite-crush") {
1310 err = rewrite_crush(argv[0], subcmds, st);
1311 } else if (cmd == "inflate-pgmap") {
1313 bool can_be_trimmed = false;
1314 po::options_description op_desc("Allowed 'inflate-pgmap' options");
1315 op_desc.add_options()
1316 ("num-maps,n", po::value<unsigned>(&n),
1317 "number of maps to add (default: 2000)")
1318 ("can-be-trimmed", po::value<bool>(&can_be_trimmed),
1319 "can be trimmed (default: false)")
1322 po::variables_map op_vm;
1324 po::parsed_options op_parsed = po::command_line_parser(subcmds).
1325 options(op_desc).run();
1326 po::store(op_parsed, op_vm);
1328 } catch (po::error &e) {
1329 std::cerr << "error: " << e.what() << std::endl;
1333 err = inflate_pgmap(st, n, can_be_trimmed);
1334 } else if (cmd == "rebuild") {
1335 err = rebuild_monstore(argv[0], subcmds, st);
1337 std::cerr << "Unrecognized command: " << cmd << std::endl;
1338 usage(argv[0], desc);