X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Fmon%2FMonMap.cc;fp=src%2Fceph%2Fsrc%2Fmon%2FMonMap.cc;h=7a1b9420e77fdcdb8a2397f1d851cf20ce0145fd;hb=812ff6ca9fcd3e629e49d4328905f33eee8ca3f5;hp=0000000000000000000000000000000000000000;hpb=15280273faafb77777eab341909a3f495cf248d9;p=stor4nfv.git diff --git a/src/ceph/src/mon/MonMap.cc b/src/ceph/src/mon/MonMap.cc new file mode 100644 index 0000000..7a1b942 --- /dev/null +++ b/src/ceph/src/mon/MonMap.cc @@ -0,0 +1,574 @@ + +#include "MonMap.h" + +#include +#include +#include +#include + +#include "common/Formatter.h" + +#include "include/ceph_features.h" +#include "include/addr_parsing.h" +#include "common/ceph_argparse.h" +#include "common/dns_resolve.h" +#include "common/errno.h" + +#include "common/dout.h" + +using ceph::Formatter; + +void mon_info_t::encode(bufferlist& bl, uint64_t features) const +{ + ENCODE_START(2, 1, bl); + ::encode(name, bl); + ::encode(public_addr, bl, features); + ::encode(priority, bl); + ENCODE_FINISH(bl); +} + +void mon_info_t::decode(bufferlist::iterator& p) +{ + DECODE_START(1, p); + ::decode(name, p); + ::decode(public_addr, p); + if (struct_v >= 2) { + ::decode(priority, p); + } + DECODE_FINISH(p); +} + +void mon_info_t::print(ostream& out) const +{ + out << "mon." << name + << " public " << public_addr + << " priority " << priority; +} + +void MonMap::sanitize_mons(map& o) +{ + // if mon_info is populated, it means we decoded a map encoded + // by someone who understands the new format (i.e., is able to + // encode 'mon_info'). This means they must also have provided + // a properly populated 'mon_addr' (which we have dropped with + // this patch), 'o' being the contents of said map. In this + // case, 'o' must have the same number of entries as 'mon_info'. + // + // Also, for each entry in 'o', there has to be a matching + // 'mon_info' entry, properly populated with a name and a matching + // 'public_addr'. + // + // OTOH, if 'mon_info' is not populated, it means the one that + // originally encoded the map does not know the new format, and + // 'o' will be our only source of info about the monitors in the + // cluster -- and we will use it to populate our 'mon_info' map. + + bool has_mon_info = false; + if (mon_info.size() > 0) { + assert(o.size() == mon_info.size()); + has_mon_info = true; + } + + for (auto p : o) { + if (has_mon_info) { + // make sure the info we have is accurate + assert(mon_info.count(p.first)); + assert(mon_info[p.first].name == p.first); + assert(mon_info[p.first].public_addr == p.second); + } else { + mon_info_t &m = mon_info[p.first]; + m.name = p.first; + m.public_addr = p.second; + } + } +} + +namespace { + struct rank_cmp { + bool operator()(const mon_info_t &a, const mon_info_t &b) const { + if (a.public_addr == b.public_addr) + return a.name < b.name; + return a.public_addr < b.public_addr; + } + }; +} + +void MonMap::calc_ranks() { + + ranks.resize(mon_info.size()); + addr_mons.clear(); + + // Used to order entries according to public_addr, because that's + // how the ranks are expected to be ordered by. We may expand this + // later on, according to some other criteria, by specifying a + // different comparator. + // + // Please note that we use a 'set' here instead of resorting to + // std::sort() because we need more info than that's available in + // the vector. The vector will thus be ordered by, e.g., public_addr + // while only containing the names of each individual monitor. + // The only way of achieving this with std::sort() would be to first + // insert every mon_info_t entry into a vector 'foo', std::sort() 'foo' + // with custom comparison functions, and then copy each invidual entry + // to a new vector. Unless there's a simpler way, we don't think the + // added complexity makes up for the additional memory usage of a 'set'. + set tmp; + + for (map::iterator p = mon_info.begin(); + p != mon_info.end(); + ++p) { + mon_info_t &m = p->second; + tmp.insert(m); + + // populate addr_mons + assert(addr_mons.count(m.public_addr) == 0); + addr_mons[m.public_addr] = m.name; + } + + // map the set to the actual ranks etc + unsigned i = 0; + for (set::iterator p = tmp.begin(); + p != tmp.end(); + ++p, ++i) { + ranks[i] = p->name; + } +} + +void MonMap::encode(bufferlist& blist, uint64_t con_features) const +{ + /* we keep the mon_addr map when encoding to ensure compatibility + * with clients and other monitors that do not yet support the 'mons' + * map. This map keeps its original behavior, containing a mapping of + * monitor id (i.e., 'foo' in 'mon.foo') to the monitor's public + * address -- which is obtained from the public address of each entry + * in the 'mons' map. + */ + map mon_addr; + for (map::const_iterator p = mon_info.begin(); + p != mon_info.end(); + ++p) { + mon_addr[p->first] = p->second.public_addr; + } + + if ((con_features & CEPH_FEATURE_MONNAMES) == 0) { + __u16 v = 1; + ::encode(v, blist); + ::encode_raw(fsid, blist); + ::encode(epoch, blist); + vector mon_inst(mon_addr.size()); + for (unsigned n = 0; n < mon_addr.size(); n++) + mon_inst[n] = get_inst(n); + ::encode(mon_inst, blist, con_features); + ::encode(last_changed, blist); + ::encode(created, blist); + return; + } + + if ((con_features & CEPH_FEATURE_MONENC) == 0) { + __u16 v = 2; + ::encode(v, blist); + ::encode_raw(fsid, blist); + ::encode(epoch, blist); + ::encode(mon_addr, blist, con_features); + ::encode(last_changed, blist); + ::encode(created, blist); + } + + ENCODE_START(5, 3, blist); + ::encode_raw(fsid, blist); + ::encode(epoch, blist); + ::encode(mon_addr, blist, con_features); + ::encode(last_changed, blist); + ::encode(created, blist); + ::encode(persistent_features, blist); + ::encode(optional_features, blist); + // this superseeds 'mon_addr' + ::encode(mon_info, blist, con_features); + ENCODE_FINISH(blist); +} + +void MonMap::decode(bufferlist::iterator &p) +{ + map mon_addr; + DECODE_START_LEGACY_COMPAT_LEN_16(5, 3, 3, p); + ::decode_raw(fsid, p); + ::decode(epoch, p); + if (struct_v == 1) { + vector mon_inst; + ::decode(mon_inst, p); + for (unsigned i = 0; i < mon_inst.size(); i++) { + char n[2]; + n[0] = '0' + i; + n[1] = 0; + string name = n; + mon_addr[name] = mon_inst[i].addr; + } + } else { + ::decode(mon_addr, p); + } + ::decode(last_changed, p); + ::decode(created, p); + if (struct_v >= 4) { + ::decode(persistent_features, p); + ::decode(optional_features, p); + } + if (struct_v >= 5) { + ::decode(mon_info, p); + } else { + // we may be decoding to an existing monmap; if we do not + // clear the mon_info map now, we will likely incur in problems + // later on MonMap::sanitize_mons() + mon_info.clear(); + } + DECODE_FINISH(p); + sanitize_mons(mon_addr); + calc_ranks(); +} + +void MonMap::generate_test_instances(list& o) +{ + o.push_back(new MonMap); + o.push_back(new MonMap); + o.back()->epoch = 1; + o.back()->last_changed = utime_t(123, 456); + o.back()->created = utime_t(789, 101112); + o.back()->add("one", entity_addr_t()); + + MonMap *m = new MonMap; + { + m->epoch = 1; + m->last_changed = utime_t(123, 456); + + entity_addr_t empty_addr_one; + empty_addr_one.set_nonce(1); + m->add("empty_addr_one", empty_addr_one); + entity_addr_t empty_addr_two; + empty_addr_two.set_nonce(2); + m->add("empty_adrr_two", empty_addr_two); + + const char *local_pub_addr_s = "127.0.1.2"; + + const char *end_p = local_pub_addr_s + strlen(local_pub_addr_s); + entity_addr_t local_pub_addr; + local_pub_addr.parse(local_pub_addr_s, &end_p); + + m->add(mon_info_t("filled_pub_addr", local_pub_addr, 1)); + + m->add("empty_addr_zero", entity_addr_t()); + } + o.push_back(m); +} + +// read from/write to a file +int MonMap::write(const char *fn) +{ + // encode + bufferlist bl; + encode(bl, CEPH_FEATURES_ALL); + + return bl.write_file(fn); +} + +int MonMap::read(const char *fn) +{ + // read + bufferlist bl; + std::string error; + int r = bl.read_file(fn, &error); + if (r < 0) + return r; + decode(bl); + return 0; +} + +void MonMap::print_summary(ostream& out) const +{ + out << "e" << epoch << ": " + << mon_info.size() << " mons at {"; + // the map that we used to print, as it was, no longer + // maps strings to the monitor's public address, but to + // mon_info_t instead. As such, print the map in a way + // that keeps the expected format. + bool has_printed = false; + for (map::const_iterator p = mon_info.begin(); + p != mon_info.end(); + ++p) { + if (has_printed) + out << ","; + out << p->first << "=" << p->second.public_addr; + has_printed = true; + } + out << "}"; +} + +void MonMap::print(ostream& out) const +{ + out << "epoch " << epoch << "\n"; + out << "fsid " << fsid << "\n"; + out << "last_changed " << last_changed << "\n"; + out << "created " << created << "\n"; + unsigned i = 0; + for (vector::const_iterator p = ranks.begin(); + p != ranks.end(); + ++p) { + out << i++ << ": " << get_addr(*p) << " mon." << *p << "\n"; + } +} + +void MonMap::dump(Formatter *f) const +{ + f->dump_unsigned("epoch", epoch); + f->dump_stream("fsid") << fsid; + f->dump_stream("modified") << last_changed; + f->dump_stream("created") << created; + f->open_object_section("features"); + persistent_features.dump(f, "persistent"); + optional_features.dump(f, "optional"); + f->close_section(); + f->open_array_section("mons"); + int i = 0; + for (vector::const_iterator p = ranks.begin(); + p != ranks.end(); + ++p, ++i) { + f->open_object_section("mon"); + f->dump_int("rank", i); + f->dump_string("name", *p); + f->dump_stream("addr") << get_addr(*p); + f->dump_stream("public_addr") << get_addr(*p); + f->close_section(); + } + f->close_section(); +} + + +int MonMap::build_from_host_list(std::string hostlist, std::string prefix) +{ + vector addrs; + if (parse_ip_port_vec(hostlist.c_str(), addrs)) { + if (addrs.empty()) + return -ENOENT; + for (unsigned i=0; i& initial_members, + string my_name, const entity_addr_t& my_addr, + set *removed) +{ + // remove non-initial members + unsigned i = 0; + while (i < size()) { + string n = get_name(i); + if (std::find(initial_members.begin(), initial_members.end(), n) != initial_members.end()) { + lgeneric_dout(cct, 1) << " keeping " << n << " " << get_addr(i) << dendl; + i++; + continue; + } + + lgeneric_dout(cct, 1) << " removing " << get_name(i) << " " << get_addr(i) << dendl; + if (removed) + removed->insert(get_addr(i)); + remove(n); + assert(!contains(n)); + } + + // add missing initial members + for (list::iterator p = initial_members.begin(); p != initial_members.end(); ++p) { + if (!contains(*p)) { + if (*p == my_name) { + lgeneric_dout(cct, 1) << " adding self " << *p << " " << my_addr << dendl; + add(*p, my_addr); + } else { + entity_addr_t a; + a.set_type(entity_addr_t::TYPE_LEGACY); + a.set_family(AF_INET); + for (int n=1; ; n++) { + a.set_nonce(n); + if (!contains(a)) + break; + } + lgeneric_dout(cct, 1) << " adding " << *p << " " << a << dendl; + add(*p, a); + } + assert(contains(*p)); + } + } +} + + +int MonMap::build_initial(CephContext *cct, ostream& errout) +{ + const md_config_t *conf = cct->_conf; + // file? + const auto monmap = conf->get_val("monmap"); + if (!monmap.empty()) { + int r; + try { + r = read(monmap.c_str()); + } + catch (const buffer::error &e) { + r = -EINVAL; + } + if (r >= 0) + return 0; + errout << "unable to read/decode monmap from " << monmap + << ": " << cpp_strerror(-r) << std::endl; + return r; + } + + // fsid from conf? + const auto new_fsid = conf->get_val("fsid"); + if (!new_fsid.is_zero()) { + fsid = new_fsid; + } + + // -m foo? + const auto mon_host = conf->get_val("mon_host"); + if (!mon_host.empty()) { + int r = build_from_host_list(mon_host, "noname-"); + if (r < 0) { + errout << "unable to parse addrs in '" << mon_host << "'" + << std::endl; + return r; + } + created = ceph_clock_now(); + last_changed = created; + return 0; + } + + // What monitors are in the config file? + std::vector sections; + int ret = conf->get_all_sections(sections); + if (ret) { + errout << "Unable to find any monitors in the configuration " + << "file, because there was an error listing the sections. error " + << ret << std::endl; + return -ENOENT; + } + std::vector mon_names; + for (std::vector ::const_iterator s = sections.begin(); + s != sections.end(); ++s) { + if ((s->substr(0, 4) == "mon.") && (s->size() > 4)) { + mon_names.push_back(s->substr(4)); + } + } + + // Find an address for each monitor in the config file. + for (std::vector ::const_iterator m = mon_names.begin(); + m != mon_names.end(); ++m) { + std::vector sections; + std::string m_name("mon"); + m_name += "."; + m_name += *m; + sections.push_back(m_name); + sections.push_back("mon"); + sections.push_back("global"); + std::string val; + int res = conf->get_val_from_conf_file(sections, "mon addr", val, true); + if (res) { + errout << "failed to get an address for mon." << *m << ": error " + << res << std::endl; + continue; + } + entity_addr_t addr; + if (!addr.parse(val.c_str())) { + errout << "unable to parse address for mon." << *m + << ": addr='" << val << "'" << std::endl; + continue; + } + if (addr.get_port() == 0) + addr.set_port(CEPH_MON_PORT); + + uint16_t priority = 0; + if (!conf->get_val_from_conf_file(sections, "mon priority", val, false)) { + try { + priority = std::stoul(val); + } catch (std::logic_error&) { + errout << "unable to parse priority for mon." << *m + << ": priority='" << val << "'" << std::endl; + continue; + } + } + // the make sure this mon isn't already in the map + if (contains(addr)) + remove(get_name(addr)); + if (contains(*m)) + remove(*m); + + add(mon_info_t{*m, addr, priority}); + } + + if (size() == 0) { + // no info found from conf options lets try use DNS SRV records + string srv_name = conf->get_val("mon_dns_srv_name"); + string domain; + // check if domain is also provided and extract it from srv_name + size_t idx = srv_name.find("_"); + if (idx != string::npos) { + domain = srv_name.substr(idx + 1); + srv_name = srv_name.substr(0, idx); + } + + map records; + if (DNSResolver::get_instance()->resolve_srv_hosts(cct, srv_name, + DNSResolver::SRV_Protocol::TCP, domain, &records) != 0) { + + errout << "unable to get monitor info from DNS SRV with service name: " << + "ceph-mon" << std::endl; + } + else { + for (const auto& record : records) { + add(mon_info_t{record.first, + record.second.addr, + record.second.priority}); + } + } + } + + if (size() == 0) { + errout << "no monitors specified to connect to." << std::endl; + return -ENOENT; + } + created = ceph_clock_now(); + last_changed = created; + return 0; +}