X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Ftools%2Fceph_monstore_tool.cc;fp=src%2Fceph%2Fsrc%2Ftools%2Fceph_monstore_tool.cc;h=bf607ffa9c415f44151874c6e88f99689f533f77;hb=812ff6ca9fcd3e629e49d4328905f33eee8ca3f5;hp=0000000000000000000000000000000000000000;hpb=15280273faafb77777eab341909a3f495cf248d9;p=stor4nfv.git diff --git a/src/ceph/src/tools/ceph_monstore_tool.cc b/src/ceph/src/tools/ceph_monstore_tool.cc new file mode 100644 index 0000000..bf607ff --- /dev/null +++ b/src/ceph/src/tools/ceph_monstore_tool.cc @@ -0,0 +1,1345 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* +* Ceph - scalable distributed file system +* +* Copyright (C) 2012 Inktank, Inc. +* +* This is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License version 2.1, as published by the Free Software +* Foundation. See file COPYING. +*/ +#include +#include +#include + +#include +#include + +#include "common/Formatter.h" +#include "common/errno.h" + +#include "auth/KeyRing.h" +#include "auth/cephx/CephxKeyServer.h" +#include "global/global_init.h" +#include "include/stringify.h" +#include "mgr/mgr_commands.h" +#include "mon/AuthMonitor.h" +#include "mon/MonitorDBStore.h" +#include "mon/Paxos.h" +#include "mon/MonMap.h" +#include "mds/MDSMap.h" +#include "osd/OSDMap.h" +#include "crush/CrushCompiler.h" + +namespace po = boost::program_options; +using namespace std; + +class TraceIter { + int fd; + unsigned idx; + MonitorDBStore::TransactionRef t; +public: + explicit TraceIter(string fname) : fd(-1), idx(-1) { + fd = ::open(fname.c_str(), O_RDONLY); + t.reset(new MonitorDBStore::Transaction); + } + bool valid() { + return fd != -1; + } + MonitorDBStore::TransactionRef cur() { + assert(valid()); + return t; + } + unsigned num() { return idx; } + void next() { + ++idx; + bufferlist bl; + int r = bl.read_fd(fd, 6); + if (r < 0) { + std::cerr << "Got error: " << cpp_strerror(r) << " on read_fd" + << std::endl; + ::close(fd); + fd = -1; + return; + } else if ((unsigned)r < 6) { + std::cerr << "short read" << std::endl; + ::close(fd); + fd = -1; + return; + } + bufferlist::iterator bliter = bl.begin(); + uint8_t ver, ver2; + ::decode(ver, bliter); + ::decode(ver2, bliter); + uint32_t len; + ::decode(len, bliter); + r = bl.read_fd(fd, len); + if (r < 0) { + std::cerr << "Got error: " << cpp_strerror(r) << " on read_fd" + << std::endl; + ::close(fd); + fd = -1; + return; + } else if ((unsigned)r < len) { + std::cerr << "short read" << std::endl; + ::close(fd); + fd = -1; + return; + } + bliter = bl.begin(); + t.reset(new MonitorDBStore::Transaction); + t->decode(bliter); + } + void init() { + next(); + } + ~TraceIter() { + if (fd != -1) { + ::close(fd); + fd = -1; + } + } +}; + + +int parse_cmd_args( + po::options_description *desc, /// < visible options description + po::options_description *hidden_desc, /// < hidden options description + po::positional_options_description *positional, /// < positional args + vector &cmd_args, /// < arguments to be parsed + po::variables_map *vm /// > post-parsing variable map + ) +{ + // desc_all will aggregate all visible and hidden options for parsing. + // + // From boost's program_options point of view, there is absolutely no + // distinction between 'desc' and 'hidden_desc'. This is a distinction + // that is only useful to us: 'desc' is whatever we are willing to show + // on 'usage()', whereas 'hidden_desc' refers to parameters we wish to + // take advantage of but do not wish to show on 'usage()'. + // + // For example, consider that program_options matches positional arguments + // (specified via 'positional') against the paramenters defined on a + // given 'po::options_description' class. This is performed below, + // supplying both the description and the positional arguments to the + // parser. However, we do not want the parameters that are mapped to + // positional arguments to be shown on usage, as that makes for ugly and + // confusing usage messages. Therefore we dissociate the options' + // description that is to be used as an aid to the user from those options + // that are nothing but useful for internal purposes (i.e., mapping options + // to positional arguments). We still need to aggregate them before parsing + // and that's what 'desc_all' is all about. + // + + assert(desc != NULL); + + po::options_description desc_all; + desc_all.add(*desc); + if (hidden_desc != NULL) + desc_all.add(*hidden_desc); + + try { + po::command_line_parser parser = po::command_line_parser(cmd_args). + options(desc_all); + + if (positional) { + parser = parser.positional(*positional); + } + + po::parsed_options parsed = parser.run(); + po::store(parsed, *vm); + po::notify(*vm); + } catch (po::error &e) { + std::cerr << "error: " << e.what() << std::endl; + return -EINVAL; + } + return 0; +} + + +/** + * usage: ceph-monstore-tool [options] + * + * commands: + * + * store-copy < --out arg > + * dump-keys + * compact + * getmonmap < --out arg [ --version arg ] > + * getosdmap < --out arg [ --version arg ] > + * dump-paxos <--dump-start VER> <--dump-end VER> + * dump-trace < --trace-file arg > + * replay-trace + * random-gen + * rewrite-crush + * inflate-pgmap + * + * wanted syntax: + * + * ceph-monstore-tool PATH CMD [options] + * + * ceph-monstore-tool PATH store-copy + * ceph-monstore-tool PATH dump-keys + * ceph-monstore-tool PATH compact + * ceph-monstore-tool PATH get monmap [VER] + * ceph-monstore-tool PATH get osdmap [VER] + * ceph-monstore-tool PATH dump-paxos STARTVER ENDVER + * + * + */ +void usage(const char *n, po::options_description &d) +{ + std::cerr << + "usage: " << n << " [args|options]\n" + << "\n" + << "Commands:\n" + << " store-copy PATH copies store to PATH\n" + << " compact compacts the store\n" + << " get monmap [-- options] get monmap (version VER if specified)\n" + << " (default: last committed)\n" + << " get osdmap [-- options] get osdmap (version VER if specified)\n" + << " (default: last committed)\n" + << " get mdsmap [-- options] get mdsmap (version VER if specified)\n" + << " (default: last committed)\n" + << " get crushmap [-- options] get crushmap (version VER if specified)\n" + << " (default: last committed)\n" + << " show-versions [-- options] show the first&last committed version of map\n" + << " (show-versions -- --help for more info)\n" + << " dump-keys dumps store keys to FILE\n" + << " (default: stdout)\n" + << " dump-paxos [-- options] dump paxos transactions\n" + << " (dump-paxos -- --help for more info)\n" + << " dump-trace FILE [-- options] dump contents of trace file FILE\n" + << " (dump-trace -- --help for more info)\n" + << " replay-trace FILE [-- options] replay trace from FILE\n" + << " (replay-trace -- --help for more info)\n" + << " random-gen [-- options] add randomly generated ops to the store\n" + << " (random-gen -- --help for more info)\n" + << " rewrite-crush [-- options] add a rewrite commit to the store\n" + << " (rewrite-crush -- --help for more info)\n" + << " inflate-pgmap [-- options] add given number of pgmaps to store\n" + << " (inflate-pgmap -- --help for more info)\n" + << " rebuild rebuild store\n" + << " (rebuild -- --help for more info)\n" + << std::endl; + std::cerr << d << std::endl; + std::cerr + << "\nPlease Note:\n" + << "* Ceph-specific options should be in the format --option-name=VAL\n" + << " (specifically, do not forget the '='!!)\n" + << "* Command-specific options need to be passed after a '--'\n" + << " e.g., 'get monmap -- --version 10 --out /tmp/foo'" + << std::endl; +} + +int update_osdmap(MonitorDBStore& store, version_t ver, bool copy, + ceph::shared_ptr crush, + MonitorDBStore::Transaction* t) { + const string prefix("osdmap"); + + // full + bufferlist bl; + int r = 0; + r = store.get(prefix, store.combine_strings("full", ver), bl); + if (r) { + std::cerr << "Error getting full map: " << cpp_strerror(r) << std::endl; + return r; + } + OSDMap osdmap; + osdmap.decode(bl); + osdmap.crush = crush; + if (copy) { + osdmap.inc_epoch(); + } + bl.clear(); + // be consistent with OSDMonitor::update_from_paxos() + osdmap.encode(bl, CEPH_FEATURES_ALL|CEPH_FEATURE_RESERVED); + t->put(prefix, store.combine_strings("full", osdmap.get_epoch()), bl); + + // incremental + OSDMap::Incremental inc; + if (copy) { + inc.epoch = osdmap.get_epoch(); + inc.fsid = osdmap.get_fsid(); + } else { + bl.clear(); + r = store.get(prefix, ver, bl); + if (r) { + std::cerr << "Error getting inc map: " << cpp_strerror(r) << std::endl; + return r; + } + OSDMap::Incremental inc(bl); + if (inc.crush.length()) { + inc.crush.clear(); + crush->encode(inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + } + if (inc.fullmap.length()) { + OSDMap fullmap; + fullmap.decode(inc.fullmap); + fullmap.crush = crush; + inc.fullmap.clear(); + fullmap.encode(inc.fullmap); + } + } + assert(osdmap.have_crc()); + inc.full_crc = osdmap.get_crc(); + bl.clear(); + // be consistent with OSDMonitor::update_from_paxos() + inc.encode(bl, CEPH_FEATURES_ALL|CEPH_FEATURE_RESERVED); + t->put(prefix, inc.epoch, bl); + return 0; +} + +int rewrite_transaction(MonitorDBStore& store, int version, + const string& crush_file, + MonitorDBStore::Transaction* t) { + const string prefix("osdmap"); + + // calc the known-good epoch + version_t last_committed = store.get(prefix, "last_committed"); + version_t good_version = 0; + if (version <= 0) { + if (last_committed >= (unsigned)-version) { + good_version = last_committed + version; + } else { + std::cerr << "osdmap-version is less than: -" << last_committed << std::endl; + return EINVAL; + } + } else { + good_version = version; + } + if (good_version >= last_committed) { + std::cout << "good epoch is greater or equal to the last committed one: " + << good_version << " >= " << last_committed << std::endl; + return 0; + } + + // load/extract the crush map + int r = 0; + ceph::shared_ptr crush(new CrushWrapper); + if (crush_file.empty()) { + bufferlist bl; + r = store.get(prefix, store.combine_strings("full", good_version), bl); + if (r) { + std::cerr << "Error getting map: " << cpp_strerror(r) << std::endl; + return r; + } + OSDMap osdmap; + osdmap.decode(bl); + crush = osdmap.crush; + } else { + string err; + bufferlist bl; + r = bl.read_file(crush_file.c_str(), &err); + if (r) { + std::cerr << err << ": " << cpp_strerror(r) << std::endl; + return r; + } + bufferlist::iterator p = bl.begin(); + crush->decode(p); + } + + // prepare a transaction to rewrite the epochs + // (good_version, last_committed] + // with the good crush map. + // XXX: may need to break this into several paxos versions? + assert(good_version < last_committed); + for (version_t v = good_version + 1; v <= last_committed; v++) { + cout << "rewriting epoch #" << v << "/" << last_committed << std::endl; + r = update_osdmap(store, v, false, crush, t); + if (r) + return r; + } + + // add a new osdmap epoch to store, so monitors will update their current osdmap + // in addition to the ones stored in epochs. + // + // This is needed due to the way the monitor updates from paxos and the + // facilities we are leveraging to push this update to the rest of the + // quorum. + // + // In a nutshell, we are generating a good version of the osdmap, with a + // proper crush, and building a transaction that will replace the bad + // osdmaps with good osdmaps. But this transaction needs to be applied on + // all nodes, so that the monitors will have good osdmaps to share with + // clients. We thus leverage Paxos, specifically the recovery mechanism, by + // creating a pending value that will be committed once the monitors form an + // initial quorum after being brought back to life. + // + // However, the way the monitor works has the paxos services, including the + // OSDMonitor, updating their state from disk *prior* to the recovery phase + // begins (so they have an up to date state in memory). This means the + // OSDMonitor will see the old, broken map, before the new paxos version is + // applied to disk, and the old version is cached. Even though we have the + // good map now, and we share the good map with clients, we will still be + // working on the old broken map. Instead of mucking around the monitor to + // make this work, we instead opt for adding the same osdmap but with a + // newer version, so that the OSDMonitor picks up on it when it updates from + // paxos after the proposal has been committed. This is not elegant, but + // avoids further unpleasantness that would arise from kludging around the + // current behavior. Also, has the added benefit of making sure the clients + // get an updated version of the map (because last_committed+1 > + // last_committed) :) + // + cout << "adding a new epoch #" << last_committed+1 << std::endl; + r = update_osdmap(store, last_committed++, true, crush, t); + if (r) + return r; + t->put(prefix, store.combine_strings("full", "latest"), last_committed); + t->put(prefix, "last_committed", last_committed); + return 0; +} + +/** + * create a new paxos version which carries a proposal to rewrite all epochs + * of incremental and full map of "osdmap" after a faulty crush map is injected. + * so the leader will trigger a recovery and propagate this fix to its peons, + * after the proposal is accepted, and the transaction in it is applied. all + * monitors will rewrite the bad crush map with the good one, and have a new + * osdmap epoch with the good crush map in it. + */ +int rewrite_crush(const char* progname, + vector& subcmds, + MonitorDBStore& store) { + po::options_description op_desc("Allowed 'rewrite-crush' options"); + int version = -1; + string crush_file; + op_desc.add_options() + ("help,h", "produce this help message") + ("crush", po::value(&crush_file), + ("path to the crush map file " + "(default: will instead extract it from the known-good osdmap)")) + ("good-epoch", po::value(&version), + "known-good epoch of osdmap, if a negative number '-N' is given, the " + "$last_committed-N is used instead (default: -1). " + "Please note, -1 is not necessarily a good epoch, because there are " + "good chance that we have more epochs slipped into the monstore after " + "the one where the crushmap is firstly injected.") + ; + po::variables_map op_vm; + int r = parse_cmd_args(&op_desc, NULL, NULL, subcmds, &op_vm); + if (r) { + return -r; + } + if (op_vm.count("help")) { + usage(progname, op_desc); + return 0; + } + + MonitorDBStore::Transaction rewrite_txn; + r = rewrite_transaction(store, version, crush_file, &rewrite_txn); + if (r) { + return r; + } + + // store the transaction into store as a proposal + const string prefix("paxos"); + version_t pending_v = store.get(prefix, "last_committed") + 1; + auto t(std::make_shared()); + bufferlist bl; + rewrite_txn.encode(bl); + cout << "adding pending commit " << pending_v + << " " << bl.length() << " bytes" << std::endl; + t->put(prefix, pending_v, bl); + t->put(prefix, "pending_v", pending_v); + // a large enough yet unique proposal number will probably do the trick + version_t pending_pn = (store.get(prefix, "accepted_pn") / 100 + 4) * 100 + 1; + t->put(prefix, "pending_pn", pending_pn); + store.apply_transaction(t); + return 0; +} + +int inflate_pgmap(MonitorDBStore& st, unsigned n, bool can_be_trimmed) { + // put latest pg map into monstore to bloat it up + // only format version == 1 is supported + version_t last = st.get("pgmap", "last_committed"); + bufferlist bl; + + // get the latest delta + int r = st.get("pgmap", last, bl); + if (r) { + std::cerr << "Error getting pgmap: " << cpp_strerror(r) << std::endl; + return r; + } + + // try to pull together an idempotent "delta" + ceph::unordered_map pg_stat; + for (KeyValueDB::Iterator i = st.get_iterator("pgmap_pg"); + i->valid(); i->next()) { + pg_t pgid; + if (!pgid.parse(i->key().c_str())) { + std::cerr << "unable to parse key " << i->key() << std::endl; + continue; + } + bufferlist pg_bl = i->value(); + pg_stat_t ps; + bufferlist::iterator p = pg_bl.begin(); + ::decode(ps, p); + // will update the last_epoch_clean of all the pgs. + pg_stat[pgid] = ps; + } + + version_t first = st.get("pgmap", "first_committed"); + version_t ver = last; + auto txn(std::make_shared()); + for (unsigned i = 0; i < n; i++) { + bufferlist trans_bl; + bufferlist dirty_pgs; + for (ceph::unordered_map::iterator ps = pg_stat.begin(); + ps != pg_stat.end(); ++ps) { + ::encode(ps->first, dirty_pgs); + if (!can_be_trimmed) { + ps->second.last_epoch_clean = first; + } + ::encode(ps->second, dirty_pgs); + } + utime_t inc_stamp = ceph_clock_now(); + ::encode(inc_stamp, trans_bl); + ::encode_destructively(dirty_pgs, trans_bl); + bufferlist dirty_osds; + ::encode(dirty_osds, trans_bl); + txn->put("pgmap", ++ver, trans_bl); + // update the db in batch + if (txn->size() > 1024) { + st.apply_transaction(txn); + // reset the transaction + txn.reset(new MonitorDBStore::Transaction); + } + } + txn->put("pgmap", "last_committed", ver); + txn->put("pgmap_meta", "version", ver); + // this will also piggy back the leftover pgmap added in the loop above + st.apply_transaction(txn); + return 0; +} + +static int update_auth(MonitorDBStore& st, const string& keyring_path) +{ + // import all keyrings stored in the keyring file + KeyRing keyring; + int r = keyring.load(g_ceph_context, keyring_path); + if (r < 0) { + cerr << "unable to load admin keyring: " << keyring_path << std::endl; + return r; + } + + bufferlist bl; + __u8 v = 1; + ::encode(v, bl); + + for (const auto& k : keyring.get_keys()) { + KeyServerData::Incremental auth_inc; + auth_inc.name = k.first; + auth_inc.auth = k.second; + if (auth_inc.auth.caps.empty()) { + cerr << "no caps granted to: " << auth_inc.name << std::endl; + return -EINVAL; + } + auth_inc.op = KeyServerData::AUTH_INC_ADD; + + AuthMonitor::Incremental inc; + inc.inc_type = AuthMonitor::AUTH_DATA; + ::encode(auth_inc, inc.auth_data); + inc.auth_type = CEPH_AUTH_CEPHX; + + inc.encode(bl, CEPH_FEATURES_ALL); + } + + const string prefix("auth"); + auto last_committed = st.get(prefix, "last_committed") + 1; + auto t = make_shared(); + t->put(prefix, last_committed, bl); + t->put(prefix, "last_committed", last_committed); + auto first_committed = st.get(prefix, "first_committed"); + if (!first_committed) { + t->put(prefix, "first_committed", last_committed); + } + st.apply_transaction(t); + return 0; +} + +static int update_mkfs(MonitorDBStore& st) +{ + MonMap monmap; + int r = monmap.build_initial(g_ceph_context, cerr); + if (r) { + cerr << "no initial monitors" << std::endl; + return -EINVAL; + } + bufferlist bl; + monmap.encode(bl, CEPH_FEATURES_ALL); + monmap.set_epoch(0); + auto t = make_shared(); + t->put("mkfs", "monmap", bl); + st.apply_transaction(t); + return 0; +} + +static int update_monitor(MonitorDBStore& st) +{ + const string prefix("monitor"); + // a stripped-down Monitor::mkfs() + bufferlist bl; + bl.append(CEPH_MON_ONDISK_MAGIC "\n"); + auto t = make_shared(); + t->put(prefix, "magic", bl); + st.apply_transaction(t); + return 0; +} + +static int update_mgrmap(MonitorDBStore& st) +{ + auto t = make_shared(); + + { + MgrMap map; + // mgr expects epoch > 1 + map.epoch++; + auto initial_modules = + get_str_vec(g_ceph_context->_conf->get_val("mgr_initial_modules")); + copy(begin(initial_modules), + end(initial_modules), + inserter(map.modules, end(map.modules))); + bufferlist bl; + map.encode(bl, CEPH_FEATURES_ALL); + t->put("mgr", map.epoch, bl); + t->put("mgr", "last_committed", map.epoch); + } + { + auto mgr_command_descs = mgr_commands; + for (auto& c : mgr_command_descs) { + c.set_flag(MonCommand::FLAG_MGR); + } + bufferlist bl; + ::encode(mgr_command_descs, bl); + t->put("mgr_command_desc", "", bl); + } + return st.apply_transaction(t); +} + +static int update_paxos(MonitorDBStore& st) +{ + // build a pending paxos proposal from all non-permanent k/v pairs. once the + // proposal is committed, it will gets applied. on the sync provider side, it + // will be a no-op, but on its peers, the paxos commit will help to build up + // the necessary epochs. + bufferlist pending_proposal; + { + MonitorDBStore::Transaction t; + vector prefixes = {"auth", "osdmap", + "mgr", "mgr_command_desc", + "pgmap", "pgmap_pg", "pgmap_meta"}; + for (const auto& prefix : prefixes) { + for (auto i = st.get_iterator(prefix); i->valid(); i->next()) { + auto key = i->raw_key(); + auto val = i->value(); + t.put(key.first, key.second, val); + } + } + t.encode(pending_proposal); + } + const string prefix("paxos"); + auto t = make_shared(); + t->put(prefix, "first_committed", 0); + t->put(prefix, "last_committed", 0); + auto pending_v = 1; + t->put(prefix, pending_v, pending_proposal); + t->put(prefix, "pending_v", pending_v); + t->put(prefix, "pending_pn", 400); + st.apply_transaction(t); + return 0; +} + +// rebuild +// - pgmap_meta/version +// - pgmap_meta/last_osdmap_epoch +// - pgmap_meta/last_pg_scan +// - pgmap_meta/full_ratio +// - pgmap_meta/nearfull_ratio +// - pgmap_meta/stamp +static int update_pgmap_meta(MonitorDBStore& st) +{ + const string prefix("pgmap_meta"); + auto t = make_shared(); + // stolen from PGMonitor::create_pending() + // the first pgmap_meta + t->put(prefix, "version", 1); + { + auto stamp = ceph_clock_now(); + bufferlist bl; + ::encode(stamp, bl); + t->put(prefix, "stamp", bl); + } + { + auto last_osdmap_epoch = st.get("osdmap", "last_committed"); + t->put(prefix, "last_osdmap_epoch", last_osdmap_epoch); + } + // be conservative, so PGMonitor will scan the all pools for pg changes + t->put(prefix, "last_pg_scan", 1); + { + auto full_ratio = g_ceph_context->_conf->mon_osd_full_ratio; + if (full_ratio > 1.0) + full_ratio /= 100.0; + bufferlist bl; + ::encode(full_ratio, bl); + t->put(prefix, "full_ratio", bl); + } + { + auto backfillfull_ratio = g_ceph_context->_conf->mon_osd_backfillfull_ratio; + if (backfillfull_ratio > 1.0) + backfillfull_ratio /= 100.0; + bufferlist bl; + ::encode(backfillfull_ratio, bl); + t->put(prefix, "backfillfull_ratio", bl); + } + { + auto nearfull_ratio = g_ceph_context->_conf->mon_osd_nearfull_ratio; + if (nearfull_ratio > 1.0) + nearfull_ratio /= 100.0; + bufferlist bl; + ::encode(nearfull_ratio, bl); + t->put(prefix, "nearfull_ratio", bl); + } + st.apply_transaction(t); + return 0; +} + +int rebuild_monstore(const char* progname, + vector& subcmds, + MonitorDBStore& st) +{ + po::options_description op_desc("Allowed 'rebuild' options"); + string keyring_path; + op_desc.add_options() + ("keyring", po::value(&keyring_path), + "path to the client.admin key"); + po::variables_map op_vm; + int r = parse_cmd_args(&op_desc, nullptr, nullptr, subcmds, &op_vm); + if (r) { + return -r; + } + if (op_vm.count("help")) { + usage(progname, op_desc); + return 0; + } + if (!keyring_path.empty()) + update_auth(st, keyring_path); + if ((r = update_pgmap_meta(st))) { + return r; + } + if ((r = update_paxos(st))) { + return r; + } + if ((r = update_mkfs(st))) { + return r; + } + if ((r = update_monitor(st))) { + return r; + } + if ((r = update_mgrmap(st))) { + return r; + } + return 0; +} + +int main(int argc, char **argv) { + int err = 0; + po::options_description desc("Allowed options"); + string store_path, cmd; + vector subcmds; + desc.add_options() + ("help,h", "produce help message") + ; + + /* Dear Future Developer: + * + * for further improvement, should you need to pass specific options to + * a command (e.g., get osdmap VER --hex), you can expand the current + * format by creating additional 'po::option_description' and passing + * 'subcmds' to 'po::command_line_parser', much like what is currently + * done by default. However, beware: in order to differentiate a + * command-specific option from the generic/global options, you will need + * to pass '--' in the command line (so that the first parser, the one + * below, assumes it has reached the end of all options); e.g., + * 'get osdmap VER -- --hex'. Not pretty; far from intuitive; it was as + * far as I got with this library. Improvements on this format will be + * left as an excercise for the reader. -Joao + */ + po::options_description positional_desc("Positional argument options"); + positional_desc.add_options() + ("store-path", po::value(&store_path), + "path to monitor's store") + ("command", po::value(&cmd), + "Command") + ("subcmd", po::value >(&subcmds), + "Command arguments/Sub-Commands") + ; + po::positional_options_description positional; + positional.add("store-path", 1); + positional.add("command", 1); + positional.add("subcmd", -1); + + po::options_description all_desc("All options"); + all_desc.add(desc).add(positional_desc); + + vector ceph_option_strings; + po::variables_map vm; + try { + po::parsed_options parsed = + po::command_line_parser(argc, argv). + options(all_desc). + positional(positional). + allow_unregistered().run(); + + po::store( + parsed, + vm); + po::notify(vm); + + // Specifying po::include_positional would have our positional arguments + // being collected (thus being part of ceph_option_strings and eventually + // passed on to global_init() below). + // Instead we specify po::exclude_positional, which has the upside of + // completely avoid this, but the downside of having to specify ceph + // options as --VAR=VAL (note the '='); otherwise we will capture the + // positional 'VAL' as belonging to us, never being collected. + ceph_option_strings = po::collect_unrecognized(parsed.options, + po::exclude_positional); + + } catch(po::error &e) { + std::cerr << "error: " << e.what() << std::endl; + return 1; + } + + // parse command structure before calling global_init() and friends. + + if (vm.empty() || vm.count("help") || + store_path.empty() || cmd.empty() || + *cmd.begin() == '-') { + usage(argv[0], desc); + return 1; + } + + vector ceph_options, def_args; + ceph_options.reserve(ceph_option_strings.size()); + for (vector::iterator i = ceph_option_strings.begin(); + i != ceph_option_strings.end(); + ++i) { + ceph_options.push_back(i->c_str()); + } + + auto cct = global_init( + &def_args, ceph_options, CEPH_ENTITY_TYPE_MON, + CODE_ENVIRONMENT_UTILITY, 0); + common_init_finish(g_ceph_context); + g_ceph_context->_conf->apply_changes(NULL); + g_conf = g_ceph_context->_conf; + + // this is where we'll write *whatever*, on a per-command basis. + // not all commands require some place to write their things. + MonitorDBStore st(store_path); + if (store_path.size()) { + stringstream ss; + int r = st.open(ss); + if (r < 0) { + std::cerr << ss.str() << std::endl; + return EINVAL; + } + } + + if (cmd == "dump-keys") { + KeyValueDB::WholeSpaceIterator iter = st.get_iterator(); + while (iter->valid()) { + pair key(iter->raw_key()); + cout << key.first << " / " << key.second << std::endl; + iter->next(); + } + } else if (cmd == "compact") { + st.compact(); + } else if (cmd == "get") { + unsigned v = 0; + string outpath; + bool readable = false; + string map_type; + // visible options for this command + po::options_description op_desc("Allowed 'get' options"); + op_desc.add_options() + ("help,h", "produce this help message") + ("out,o", po::value(&outpath), + "output file (default: stdout)") + ("version,v", po::value(&v), + "map version to obtain") + ("readable,r", po::value(&readable)->default_value(false), + "print the map infomation in human readable format") + ; + // this is going to be a positional argument; we don't want to show + // it as an option during --help, but we do want to have it captured + // when parsing. + po::options_description hidden_op_desc("Hidden 'get' options"); + hidden_op_desc.add_options() + ("map-type", po::value(&map_type), + "map-type") + ; + po::positional_options_description op_positional; + op_positional.add("map-type", 1); + + po::variables_map op_vm; + int r = parse_cmd_args(&op_desc, &hidden_op_desc, &op_positional, + subcmds, &op_vm); + if (r < 0) { + err = -r; + goto done; + } + + if (op_vm.count("help") || map_type.empty()) { + usage(argv[0], op_desc); + err = 0; + goto done; + } + + if (v == 0) { + if (map_type == "crushmap") { + v = st.get("osdmap", "last_committed"); + } else { + v = st.get(map_type, "last_committed"); + } + } + + int fd = STDOUT_FILENO; + if (!outpath.empty()){ + fd = ::open(outpath.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666); + if (fd < 0) { + std::cerr << "error opening output file: " + << cpp_strerror(errno) << std::endl; + err = EINVAL; + goto done; + } + } + + BOOST_SCOPE_EXIT((&r) (&fd) (&outpath)) { + ::close(fd); + if (r < 0 && fd != STDOUT_FILENO) { + ::remove(outpath.c_str()); + } + } BOOST_SCOPE_EXIT_END + + bufferlist bl; + r = 0; + if (map_type == "osdmap") { + r = st.get(map_type, st.combine_strings("full", v), bl); + } else if (map_type == "crushmap") { + bufferlist tmp; + r = st.get("osdmap", st.combine_strings("full", v), tmp); + if (r >= 0) { + OSDMap osdmap; + osdmap.decode(tmp); + osdmap.crush->encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT); + } + } else { + r = st.get(map_type, v, bl); + } + if (r < 0) { + std::cerr << "Error getting map: " << cpp_strerror(r) << std::endl; + err = EINVAL; + goto done; + } + + if (readable) { + stringstream ss; + bufferlist out; + if (map_type == "monmap") { + MonMap monmap; + monmap.decode(bl); + monmap.print(ss); + } else if (map_type == "osdmap") { + OSDMap osdmap; + osdmap.decode(bl); + osdmap.print(ss); + } else if (map_type == "mdsmap") { + MDSMap mdsmap; + mdsmap.decode(bl); + mdsmap.print(ss); + } else if (map_type == "crushmap") { + CrushWrapper cw; + bufferlist::iterator it = bl.begin(); + cw.decode(it); + CrushCompiler cc(cw, std::cerr, 0); + cc.decompile(ss); + } else { + std::cerr << "This type of readable map does not exist: " << map_type << std::endl + << "You can only specify[osdmap|monmap|mdsmap|crushmap]" << std::endl; + } + out.append(ss); + out.write_fd(fd); + } else { + bl.write_fd(fd); + } + + if (!outpath.empty()) { + std::cout << "wrote " << map_type + << " version " << v << " to " << outpath + << std::endl; + } + } else if (cmd == "show-versions") { + string map_type; //map type:osdmap,monmap... + // visible options for this command + po::options_description op_desc("Allowed 'show-versions' options"); + op_desc.add_options() + ("help,h", "produce this help message") + ("map-type", po::value(&map_type), "map_type"); + + po::positional_options_description op_positional; + op_positional.add("map-type", 1); + + po::variables_map op_vm; + int r = parse_cmd_args(&op_desc, NULL, &op_positional, + subcmds, &op_vm); + if (r < 0) { + err = -r; + goto done; + } + + if (op_vm.count("help") || map_type.empty()) { + usage(argv[0], op_desc); + err = 0; + goto done; + } + + unsigned int v_first = 0; + unsigned int v_last = 0; + v_first = st.get(map_type, "first_committed"); + v_last = st.get(map_type, "last_committed"); + + std::cout << "first committed:\t" << v_first << "\n" + << "last committed:\t" << v_last << std::endl; + } else if (cmd == "dump-paxos") { + unsigned dstart = 0; + unsigned dstop = ~0; + po::options_description op_desc("Allowed 'dump-paxos' options"); + op_desc.add_options() + ("help,h", "produce this help message") + ("start,s", po::value(&dstart), + "starting version (default: 0)") + ("end,e", po::value(&dstop), + "finish version (default: ~0)") + ; + + po::variables_map op_vm; + int r = parse_cmd_args(&op_desc, NULL, NULL, + subcmds, &op_vm); + if (r < 0) { + err = -r; + goto done; + } + + if (op_vm.count("help")) { + usage(argv[0], op_desc); + err = 0; + goto done; + } + + if (dstart > dstop) { + std::cerr << "error: 'start' version (value: " << dstart << ") " + << " is greater than 'end' version (value: " << dstop << ")" + << std::endl; + err = EINVAL; + goto done; + } + + version_t v = dstart; + for (; v <= dstop; ++v) { + bufferlist bl; + st.get("paxos", v, bl); + if (bl.length() == 0) + break; + cout << "\n--- " << v << " ---" << std::endl; + auto tx(std::make_shared()); + Paxos::decode_append_transaction(tx, bl); + JSONFormatter f(true); + tx->dump(&f); + f.flush(cout); + } + + std::cout << "dumped " << v << " paxos versions" << std::endl; + + } else if (cmd == "dump-trace") { + unsigned dstart = 0; + unsigned dstop = ~0; + string outpath; + + // visible options for this command + po::options_description op_desc("Allowed 'dump-trace' options"); + op_desc.add_options() + ("help,h", "produce this help message") + ("start,s", po::value(&dstart), + "starting version (default: 0)") + ("end,e", po::value(&dstop), + "finish version (default: ~0)") + ; + // this is going to be a positional argument; we don't want to show + // it as an option during --help, but we do want to have it captured + // when parsing. + po::options_description hidden_op_desc("Hidden 'dump-trace' options"); + hidden_op_desc.add_options() + ("out,o", po::value(&outpath), + "file to write the dump to") + ; + po::positional_options_description op_positional; + op_positional.add("out", 1); + + po::variables_map op_vm; + int r = parse_cmd_args(&op_desc, &hidden_op_desc, &op_positional, + subcmds, &op_vm); + if (r < 0) { + err = -r; + goto done; + } + + if (op_vm.count("help")) { + usage(argv[0], op_desc); + err = 0; + goto done; + } + + if (outpath.empty()) { + usage(argv[0], op_desc); + err = EINVAL; + goto done; + } + + if (dstart > dstop) { + std::cerr << "error: 'start' version (value: " << dstart << ") " + << " is greater than 'stop' version (value: " << dstop << ")" + << std::endl; + err = EINVAL; + goto done; + } + + TraceIter iter(outpath.c_str()); + iter.init(); + while (true) { + if (!iter.valid()) + break; + if (iter.num() >= dstop) { + break; + } + if (iter.num() >= dstart) { + JSONFormatter f(true); + iter.cur()->dump(&f, false); + f.flush(std::cout); + std::cout << std::endl; + } + iter.next(); + } + std::cerr << "Read up to transaction " << iter.num() << std::endl; + } else if (cmd == "replay-trace") { + string inpath; + unsigned num_replays = 1; + // visible options for this command + po::options_description op_desc("Allowed 'replay-trace' options"); + op_desc.add_options() + ("help,h", "produce this help message") + ("num-replays,n", po::value(&num_replays), + "finish version (default: 1)") + ; + // this is going to be a positional argument; we don't want to show + // it as an option during --help, but we do want to have it captured + // when parsing. + po::options_description hidden_op_desc("Hidden 'replay-trace' options"); + hidden_op_desc.add_options() + ("in,i", po::value(&inpath), + "file to write the dump to") + ; + po::positional_options_description op_positional; + op_positional.add("in", 1); + + // op_desc_all will aggregate all visible and hidden options for parsing. + // when we call 'usage()' we just pass 'op_desc', as that's the description + // holding the visible options. + po::options_description op_desc_all; + op_desc_all.add(op_desc).add(hidden_op_desc); + + po::variables_map op_vm; + try { + po::parsed_options op_parsed = po::command_line_parser(subcmds). + options(op_desc_all).positional(op_positional).run(); + po::store(op_parsed, op_vm); + po::notify(op_vm); + } catch (po::error &e) { + std::cerr << "error: " << e.what() << std::endl; + err = EINVAL; + goto done; + } + + if (op_vm.count("help")) { + usage(argv[0], op_desc); + err = 0; + goto done; + } + + if (inpath.empty()) { + usage(argv[0], op_desc); + err = EINVAL; + goto done; + } + + unsigned num = 0; + for (unsigned i = 0; i < num_replays; ++i) { + TraceIter iter(inpath.c_str()); + iter.init(); + while (true) { + if (!iter.valid()) + break; + std::cerr << "Replaying trans num " << num << std::endl; + st.apply_transaction(iter.cur()); + iter.next(); + ++num; + } + std::cerr << "Read up to transaction " << iter.num() << std::endl; + } + } else if (cmd == "random-gen") { + unsigned tsize = 200; + unsigned tvalsize = 1024; + unsigned ntrans = 100; + po::options_description op_desc("Allowed 'random-gen' options"); + op_desc.add_options() + ("help,h", "produce this help message") + ("num-keys,k", po::value(&tsize), + "keys to write in each transaction (default: 200)") + ("size,s", po::value(&tvalsize), + "size (in bytes) of the value to write in each key (default: 1024)") + ("ntrans,n", po::value(&ntrans), + "number of transactions to run (default: 100)") + ; + + po::variables_map op_vm; + try { + po::parsed_options op_parsed = po::command_line_parser(subcmds). + options(op_desc).run(); + po::store(op_parsed, op_vm); + po::notify(op_vm); + } catch (po::error &e) { + std::cerr << "error: " << e.what() << std::endl; + err = EINVAL; + goto done; + } + + if (op_vm.count("help")) { + usage(argv[0], op_desc); + err = 0; + goto done; + } + + unsigned num = 0; + for (unsigned i = 0; i < ntrans; ++i) { + std::cerr << "Applying trans " << i << std::endl; + auto t(std::make_shared()); + string prefix; + prefix.push_back((i%26)+'a'); + for (unsigned j = 0; j < tsize; ++j) { + stringstream os; + os << num; + bufferlist bl; + for (unsigned k = 0; k < tvalsize; ++k) bl.append(rand()); + t->put(prefix, os.str(), bl); + ++num; + } + t->compact_prefix(prefix); + st.apply_transaction(t); + } + } else if (cmd == "store-copy") { + if (subcmds.size() < 1 || subcmds[0].empty()) { + usage(argv[0], desc); + err = EINVAL; + goto done; + } + + string out_path = subcmds[0]; + + MonitorDBStore out_store(out_path); + { + stringstream ss; + int r = out_store.create_and_open(ss); + if (r < 0) { + std::cerr << ss.str() << std::endl; + goto done; + } + } + + + KeyValueDB::WholeSpaceIterator it = st.get_iterator(); + uint64_t total_keys = 0; + uint64_t total_size = 0; + uint64_t total_tx = 0; + + do { + uint64_t num_keys = 0; + + auto tx(std::make_shared()); + + while (it->valid() && num_keys < 128) { + pair k = it->raw_key(); + bufferlist v = it->value(); + tx->put(k.first, k.second, v); + + num_keys ++; + total_tx ++; + total_size += v.length(); + + it->next(); + } + + total_keys += num_keys; + + if (!tx->empty()) + out_store.apply_transaction(tx); + + std::cout << "copied " << total_keys << " keys so far (" + << stringify(si_t(total_size)) << ")" << std::endl; + + } while (it->valid()); + out_store.close(); + std::cout << "summary: copied " << total_keys << " keys, using " + << total_tx << " transactions, totalling " + << stringify(si_t(total_size)) << std::endl; + std::cout << "from '" << store_path << "' to '" << out_path << "'" + << std::endl; + } else if (cmd == "rewrite-crush") { + err = rewrite_crush(argv[0], subcmds, st); + } else if (cmd == "inflate-pgmap") { + unsigned n = 2000; + bool can_be_trimmed = false; + po::options_description op_desc("Allowed 'inflate-pgmap' options"); + op_desc.add_options() + ("num-maps,n", po::value(&n), + "number of maps to add (default: 2000)") + ("can-be-trimmed", po::value(&can_be_trimmed), + "can be trimmed (default: false)") + ; + + po::variables_map op_vm; + try { + po::parsed_options op_parsed = po::command_line_parser(subcmds). + options(op_desc).run(); + po::store(op_parsed, op_vm); + po::notify(op_vm); + } catch (po::error &e) { + std::cerr << "error: " << e.what() << std::endl; + err = EINVAL; + goto done; + } + err = inflate_pgmap(st, n, can_be_trimmed); + } else if (cmd == "rebuild") { + err = rebuild_monstore(argv[0], subcmds, st); + } else { + std::cerr << "Unrecognized command: " << cmd << std::endl; + usage(argv[0], desc); + goto done; + } + + done: + st.close(); + return err; +}