#include "MonMap.h" #include #include #include #include #include "common/Formatter.h" #include "include/ceph_features.h" #include "include/addr_parsing.h" #include "common/ceph_argparse.h" #include "common/dns_resolve.h" #include "common/errno.h" #include "common/dout.h" using ceph::Formatter; void mon_info_t::encode(bufferlist& bl, uint64_t features) const { ENCODE_START(2, 1, bl); ::encode(name, bl); ::encode(public_addr, bl, features); ::encode(priority, bl); ENCODE_FINISH(bl); } void mon_info_t::decode(bufferlist::iterator& p) { DECODE_START(1, p); ::decode(name, p); ::decode(public_addr, p); if (struct_v >= 2) { ::decode(priority, p); } DECODE_FINISH(p); } void mon_info_t::print(ostream& out) const { out << "mon." << name << " public " << public_addr << " priority " << priority; } void MonMap::sanitize_mons(map& o) { // if mon_info is populated, it means we decoded a map encoded // by someone who understands the new format (i.e., is able to // encode 'mon_info'). This means they must also have provided // a properly populated 'mon_addr' (which we have dropped with // this patch), 'o' being the contents of said map. In this // case, 'o' must have the same number of entries as 'mon_info'. // // Also, for each entry in 'o', there has to be a matching // 'mon_info' entry, properly populated with a name and a matching // 'public_addr'. // // OTOH, if 'mon_info' is not populated, it means the one that // originally encoded the map does not know the new format, and // 'o' will be our only source of info about the monitors in the // cluster -- and we will use it to populate our 'mon_info' map. bool has_mon_info = false; if (mon_info.size() > 0) { assert(o.size() == mon_info.size()); has_mon_info = true; } for (auto p : o) { if (has_mon_info) { // make sure the info we have is accurate assert(mon_info.count(p.first)); assert(mon_info[p.first].name == p.first); assert(mon_info[p.first].public_addr == p.second); } else { mon_info_t &m = mon_info[p.first]; m.name = p.first; m.public_addr = p.second; } } } namespace { struct rank_cmp { bool operator()(const mon_info_t &a, const mon_info_t &b) const { if (a.public_addr == b.public_addr) return a.name < b.name; return a.public_addr < b.public_addr; } }; } void MonMap::calc_ranks() { ranks.resize(mon_info.size()); addr_mons.clear(); // Used to order entries according to public_addr, because that's // how the ranks are expected to be ordered by. We may expand this // later on, according to some other criteria, by specifying a // different comparator. // // Please note that we use a 'set' here instead of resorting to // std::sort() because we need more info than that's available in // the vector. The vector will thus be ordered by, e.g., public_addr // while only containing the names of each individual monitor. // The only way of achieving this with std::sort() would be to first // insert every mon_info_t entry into a vector 'foo', std::sort() 'foo' // with custom comparison functions, and then copy each invidual entry // to a new vector. Unless there's a simpler way, we don't think the // added complexity makes up for the additional memory usage of a 'set'. set tmp; for (map::iterator p = mon_info.begin(); p != mon_info.end(); ++p) { mon_info_t &m = p->second; tmp.insert(m); // populate addr_mons assert(addr_mons.count(m.public_addr) == 0); addr_mons[m.public_addr] = m.name; } // map the set to the actual ranks etc unsigned i = 0; for (set::iterator p = tmp.begin(); p != tmp.end(); ++p, ++i) { ranks[i] = p->name; } } void MonMap::encode(bufferlist& blist, uint64_t con_features) const { /* we keep the mon_addr map when encoding to ensure compatibility * with clients and other monitors that do not yet support the 'mons' * map. This map keeps its original behavior, containing a mapping of * monitor id (i.e., 'foo' in 'mon.foo') to the monitor's public * address -- which is obtained from the public address of each entry * in the 'mons' map. */ map mon_addr; for (map::const_iterator p = mon_info.begin(); p != mon_info.end(); ++p) { mon_addr[p->first] = p->second.public_addr; } if ((con_features & CEPH_FEATURE_MONNAMES) == 0) { __u16 v = 1; ::encode(v, blist); ::encode_raw(fsid, blist); ::encode(epoch, blist); vector mon_inst(mon_addr.size()); for (unsigned n = 0; n < mon_addr.size(); n++) mon_inst[n] = get_inst(n); ::encode(mon_inst, blist, con_features); ::encode(last_changed, blist); ::encode(created, blist); return; } if ((con_features & CEPH_FEATURE_MONENC) == 0) { __u16 v = 2; ::encode(v, blist); ::encode_raw(fsid, blist); ::encode(epoch, blist); ::encode(mon_addr, blist, con_features); ::encode(last_changed, blist); ::encode(created, blist); } ENCODE_START(5, 3, blist); ::encode_raw(fsid, blist); ::encode(epoch, blist); ::encode(mon_addr, blist, con_features); ::encode(last_changed, blist); ::encode(created, blist); ::encode(persistent_features, blist); ::encode(optional_features, blist); // this superseeds 'mon_addr' ::encode(mon_info, blist, con_features); ENCODE_FINISH(blist); } void MonMap::decode(bufferlist::iterator &p) { map mon_addr; DECODE_START_LEGACY_COMPAT_LEN_16(5, 3, 3, p); ::decode_raw(fsid, p); ::decode(epoch, p); if (struct_v == 1) { vector mon_inst; ::decode(mon_inst, p); for (unsigned i = 0; i < mon_inst.size(); i++) { char n[2]; n[0] = '0' + i; n[1] = 0; string name = n; mon_addr[name] = mon_inst[i].addr; } } else { ::decode(mon_addr, p); } ::decode(last_changed, p); ::decode(created, p); if (struct_v >= 4) { ::decode(persistent_features, p); ::decode(optional_features, p); } if (struct_v >= 5) { ::decode(mon_info, p); } else { // we may be decoding to an existing monmap; if we do not // clear the mon_info map now, we will likely incur in problems // later on MonMap::sanitize_mons() mon_info.clear(); } DECODE_FINISH(p); sanitize_mons(mon_addr); calc_ranks(); } void MonMap::generate_test_instances(list& o) { o.push_back(new MonMap); o.push_back(new MonMap); o.back()->epoch = 1; o.back()->last_changed = utime_t(123, 456); o.back()->created = utime_t(789, 101112); o.back()->add("one", entity_addr_t()); MonMap *m = new MonMap; { m->epoch = 1; m->last_changed = utime_t(123, 456); entity_addr_t empty_addr_one; empty_addr_one.set_nonce(1); m->add("empty_addr_one", empty_addr_one); entity_addr_t empty_addr_two; empty_addr_two.set_nonce(2); m->add("empty_adrr_two", empty_addr_two); const char *local_pub_addr_s = "127.0.1.2"; const char *end_p = local_pub_addr_s + strlen(local_pub_addr_s); entity_addr_t local_pub_addr; local_pub_addr.parse(local_pub_addr_s, &end_p); m->add(mon_info_t("filled_pub_addr", local_pub_addr, 1)); m->add("empty_addr_zero", entity_addr_t()); } o.push_back(m); } // read from/write to a file int MonMap::write(const char *fn) { // encode bufferlist bl; encode(bl, CEPH_FEATURES_ALL); return bl.write_file(fn); } int MonMap::read(const char *fn) { // read bufferlist bl; std::string error; int r = bl.read_file(fn, &error); if (r < 0) return r; decode(bl); return 0; } void MonMap::print_summary(ostream& out) const { out << "e" << epoch << ": " << mon_info.size() << " mons at {"; // the map that we used to print, as it was, no longer // maps strings to the monitor's public address, but to // mon_info_t instead. As such, print the map in a way // that keeps the expected format. bool has_printed = false; for (map::const_iterator p = mon_info.begin(); p != mon_info.end(); ++p) { if (has_printed) out << ","; out << p->first << "=" << p->second.public_addr; has_printed = true; } out << "}"; } void MonMap::print(ostream& out) const { out << "epoch " << epoch << "\n"; out << "fsid " << fsid << "\n"; out << "last_changed " << last_changed << "\n"; out << "created " << created << "\n"; unsigned i = 0; for (vector::const_iterator p = ranks.begin(); p != ranks.end(); ++p) { out << i++ << ": " << get_addr(*p) << " mon." << *p << "\n"; } } void MonMap::dump(Formatter *f) const { f->dump_unsigned("epoch", epoch); f->dump_stream("fsid") << fsid; f->dump_stream("modified") << last_changed; f->dump_stream("created") << created; f->open_object_section("features"); persistent_features.dump(f, "persistent"); optional_features.dump(f, "optional"); f->close_section(); f->open_array_section("mons"); int i = 0; for (vector::const_iterator p = ranks.begin(); p != ranks.end(); ++p, ++i) { f->open_object_section("mon"); f->dump_int("rank", i); f->dump_string("name", *p); f->dump_stream("addr") << get_addr(*p); f->dump_stream("public_addr") << get_addr(*p); f->close_section(); } f->close_section(); } int MonMap::build_from_host_list(std::string hostlist, std::string prefix) { vector addrs; if (parse_ip_port_vec(hostlist.c_str(), addrs)) { if (addrs.empty()) return -ENOENT; for (unsigned i=0; i& initial_members, string my_name, const entity_addr_t& my_addr, set *removed) { // remove non-initial members unsigned i = 0; while (i < size()) { string n = get_name(i); if (std::find(initial_members.begin(), initial_members.end(), n) != initial_members.end()) { lgeneric_dout(cct, 1) << " keeping " << n << " " << get_addr(i) << dendl; i++; continue; } lgeneric_dout(cct, 1) << " removing " << get_name(i) << " " << get_addr(i) << dendl; if (removed) removed->insert(get_addr(i)); remove(n); assert(!contains(n)); } // add missing initial members for (list::iterator p = initial_members.begin(); p != initial_members.end(); ++p) { if (!contains(*p)) { if (*p == my_name) { lgeneric_dout(cct, 1) << " adding self " << *p << " " << my_addr << dendl; add(*p, my_addr); } else { entity_addr_t a; a.set_type(entity_addr_t::TYPE_LEGACY); a.set_family(AF_INET); for (int n=1; ; n++) { a.set_nonce(n); if (!contains(a)) break; } lgeneric_dout(cct, 1) << " adding " << *p << " " << a << dendl; add(*p, a); } assert(contains(*p)); } } } int MonMap::build_initial(CephContext *cct, ostream& errout) { const md_config_t *conf = cct->_conf; // file? const auto monmap = conf->get_val("monmap"); if (!monmap.empty()) { int r; try { r = read(monmap.c_str()); } catch (const buffer::error &e) { r = -EINVAL; } if (r >= 0) return 0; errout << "unable to read/decode monmap from " << monmap << ": " << cpp_strerror(-r) << std::endl; return r; } // fsid from conf? const auto new_fsid = conf->get_val("fsid"); if (!new_fsid.is_zero()) { fsid = new_fsid; } // -m foo? const auto mon_host = conf->get_val("mon_host"); if (!mon_host.empty()) { int r = build_from_host_list(mon_host, "noname-"); if (r < 0) { errout << "unable to parse addrs in '" << mon_host << "'" << std::endl; return r; } created = ceph_clock_now(); last_changed = created; return 0; } // What monitors are in the config file? std::vector sections; int ret = conf->get_all_sections(sections); if (ret) { errout << "Unable to find any monitors in the configuration " << "file, because there was an error listing the sections. error " << ret << std::endl; return -ENOENT; } std::vector mon_names; for (std::vector ::const_iterator s = sections.begin(); s != sections.end(); ++s) { if ((s->substr(0, 4) == "mon.") && (s->size() > 4)) { mon_names.push_back(s->substr(4)); } } // Find an address for each monitor in the config file. for (std::vector ::const_iterator m = mon_names.begin(); m != mon_names.end(); ++m) { std::vector sections; std::string m_name("mon"); m_name += "."; m_name += *m; sections.push_back(m_name); sections.push_back("mon"); sections.push_back("global"); std::string val; int res = conf->get_val_from_conf_file(sections, "mon addr", val, true); if (res) { errout << "failed to get an address for mon." << *m << ": error " << res << std::endl; continue; } entity_addr_t addr; if (!addr.parse(val.c_str())) { errout << "unable to parse address for mon." << *m << ": addr='" << val << "'" << std::endl; continue; } if (addr.get_port() == 0) addr.set_port(CEPH_MON_PORT); uint16_t priority = 0; if (!conf->get_val_from_conf_file(sections, "mon priority", val, false)) { try { priority = std::stoul(val); } catch (std::logic_error&) { errout << "unable to parse priority for mon." << *m << ": priority='" << val << "'" << std::endl; continue; } } // the make sure this mon isn't already in the map if (contains(addr)) remove(get_name(addr)); if (contains(*m)) remove(*m); add(mon_info_t{*m, addr, priority}); } if (size() == 0) { // no info found from conf options lets try use DNS SRV records string srv_name = conf->get_val("mon_dns_srv_name"); string domain; // check if domain is also provided and extract it from srv_name size_t idx = srv_name.find("_"); if (idx != string::npos) { domain = srv_name.substr(idx + 1); srv_name = srv_name.substr(0, idx); } map records; if (DNSResolver::get_instance()->resolve_srv_hosts(cct, srv_name, DNSResolver::SRV_Protocol::TCP, domain, &records) != 0) { errout << "unable to get monitor info from DNS SRV with service name: " << "ceph-mon" << std::endl; } else { for (const auto& record : records) { add(mon_info_t{record.first, record.second.addr, record.second.priority}); } } } if (size() == 0) { errout << "no monitors specified to connect to." << std::endl; return -ENOENT; } created = ceph_clock_now(); last_changed = created; return 0; }