9 #include "common/Formatter.h"
11 #include "include/ceph_features.h"
12 #include "include/addr_parsing.h"
13 #include "common/ceph_argparse.h"
14 #include "common/dns_resolve.h"
15 #include "common/errno.h"
17 #include "common/dout.h"
19 using ceph::Formatter;
21 void mon_info_t::encode(bufferlist& bl, uint64_t features) const
23 ENCODE_START(2, 1, bl);
25 ::encode(public_addr, bl, features);
26 ::encode(priority, bl);
30 void mon_info_t::decode(bufferlist::iterator& p)
34 ::decode(public_addr, p);
36 ::decode(priority, p);
41 void mon_info_t::print(ostream& out) const
44 << " public " << public_addr
45 << " priority " << priority;
48 void MonMap::sanitize_mons(map<string,entity_addr_t>& o)
50 // if mon_info is populated, it means we decoded a map encoded
51 // by someone who understands the new format (i.e., is able to
52 // encode 'mon_info'). This means they must also have provided
53 // a properly populated 'mon_addr' (which we have dropped with
54 // this patch), 'o' being the contents of said map. In this
55 // case, 'o' must have the same number of entries as 'mon_info'.
57 // Also, for each entry in 'o', there has to be a matching
58 // 'mon_info' entry, properly populated with a name and a matching
61 // OTOH, if 'mon_info' is not populated, it means the one that
62 // originally encoded the map does not know the new format, and
63 // 'o' will be our only source of info about the monitors in the
64 // cluster -- and we will use it to populate our 'mon_info' map.
66 bool has_mon_info = false;
67 if (mon_info.size() > 0) {
68 assert(o.size() == mon_info.size());
74 // make sure the info we have is accurate
75 assert(mon_info.count(p.first));
76 assert(mon_info[p.first].name == p.first);
77 assert(mon_info[p.first].public_addr == p.second);
79 mon_info_t &m = mon_info[p.first];
81 m.public_addr = p.second;
88 bool operator()(const mon_info_t &a, const mon_info_t &b) const {
89 if (a.public_addr == b.public_addr)
90 return a.name < b.name;
91 return a.public_addr < b.public_addr;
96 void MonMap::calc_ranks() {
98 ranks.resize(mon_info.size());
101 // Used to order entries according to public_addr, because that's
102 // how the ranks are expected to be ordered by. We may expand this
103 // later on, according to some other criteria, by specifying a
104 // different comparator.
106 // Please note that we use a 'set' here instead of resorting to
107 // std::sort() because we need more info than that's available in
108 // the vector. The vector will thus be ordered by, e.g., public_addr
109 // while only containing the names of each individual monitor.
110 // The only way of achieving this with std::sort() would be to first
111 // insert every mon_info_t entry into a vector 'foo', std::sort() 'foo'
112 // with custom comparison functions, and then copy each invidual entry
113 // to a new vector. Unless there's a simpler way, we don't think the
114 // added complexity makes up for the additional memory usage of a 'set'.
115 set<mon_info_t, rank_cmp> tmp;
117 for (map<string,mon_info_t>::iterator p = mon_info.begin();
120 mon_info_t &m = p->second;
123 // populate addr_mons
124 assert(addr_mons.count(m.public_addr) == 0);
125 addr_mons[m.public_addr] = m.name;
128 // map the set to the actual ranks etc
130 for (set<mon_info_t>::iterator p = tmp.begin();
137 void MonMap::encode(bufferlist& blist, uint64_t con_features) const
139 /* we keep the mon_addr map when encoding to ensure compatibility
140 * with clients and other monitors that do not yet support the 'mons'
141 * map. This map keeps its original behavior, containing a mapping of
142 * monitor id (i.e., 'foo' in 'mon.foo') to the monitor's public
143 * address -- which is obtained from the public address of each entry
146 map<string,entity_addr_t> mon_addr;
147 for (map<string,mon_info_t>::const_iterator p = mon_info.begin();
150 mon_addr[p->first] = p->second.public_addr;
153 if ((con_features & CEPH_FEATURE_MONNAMES) == 0) {
156 ::encode_raw(fsid, blist);
157 ::encode(epoch, blist);
158 vector<entity_inst_t> mon_inst(mon_addr.size());
159 for (unsigned n = 0; n < mon_addr.size(); n++)
160 mon_inst[n] = get_inst(n);
161 ::encode(mon_inst, blist, con_features);
162 ::encode(last_changed, blist);
163 ::encode(created, blist);
167 if ((con_features & CEPH_FEATURE_MONENC) == 0) {
170 ::encode_raw(fsid, blist);
171 ::encode(epoch, blist);
172 ::encode(mon_addr, blist, con_features);
173 ::encode(last_changed, blist);
174 ::encode(created, blist);
177 ENCODE_START(5, 3, blist);
178 ::encode_raw(fsid, blist);
179 ::encode(epoch, blist);
180 ::encode(mon_addr, blist, con_features);
181 ::encode(last_changed, blist);
182 ::encode(created, blist);
183 ::encode(persistent_features, blist);
184 ::encode(optional_features, blist);
185 // this superseeds 'mon_addr'
186 ::encode(mon_info, blist, con_features);
187 ENCODE_FINISH(blist);
190 void MonMap::decode(bufferlist::iterator &p)
192 map<string,entity_addr_t> mon_addr;
193 DECODE_START_LEGACY_COMPAT_LEN_16(5, 3, 3, p);
194 ::decode_raw(fsid, p);
197 vector<entity_inst_t> mon_inst;
198 ::decode(mon_inst, p);
199 for (unsigned i = 0; i < mon_inst.size(); i++) {
204 mon_addr[name] = mon_inst[i].addr;
207 ::decode(mon_addr, p);
209 ::decode(last_changed, p);
210 ::decode(created, p);
212 ::decode(persistent_features, p);
213 ::decode(optional_features, p);
216 ::decode(mon_info, p);
218 // we may be decoding to an existing monmap; if we do not
219 // clear the mon_info map now, we will likely incur in problems
220 // later on MonMap::sanitize_mons()
224 sanitize_mons(mon_addr);
228 void MonMap::generate_test_instances(list<MonMap*>& o)
230 o.push_back(new MonMap);
231 o.push_back(new MonMap);
233 o.back()->last_changed = utime_t(123, 456);
234 o.back()->created = utime_t(789, 101112);
235 o.back()->add("one", entity_addr_t());
237 MonMap *m = new MonMap;
240 m->last_changed = utime_t(123, 456);
242 entity_addr_t empty_addr_one;
243 empty_addr_one.set_nonce(1);
244 m->add("empty_addr_one", empty_addr_one);
245 entity_addr_t empty_addr_two;
246 empty_addr_two.set_nonce(2);
247 m->add("empty_adrr_two", empty_addr_two);
249 const char *local_pub_addr_s = "127.0.1.2";
251 const char *end_p = local_pub_addr_s + strlen(local_pub_addr_s);
252 entity_addr_t local_pub_addr;
253 local_pub_addr.parse(local_pub_addr_s, &end_p);
255 m->add(mon_info_t("filled_pub_addr", local_pub_addr, 1));
257 m->add("empty_addr_zero", entity_addr_t());
262 // read from/write to a file
263 int MonMap::write(const char *fn)
267 encode(bl, CEPH_FEATURES_ALL);
269 return bl.write_file(fn);
272 int MonMap::read(const char *fn)
277 int r = bl.read_file(fn, &error);
284 void MonMap::print_summary(ostream& out) const
286 out << "e" << epoch << ": "
287 << mon_info.size() << " mons at {";
288 // the map that we used to print, as it was, no longer
289 // maps strings to the monitor's public address, but to
290 // mon_info_t instead. As such, print the map in a way
291 // that keeps the expected format.
292 bool has_printed = false;
293 for (map<string,mon_info_t>::const_iterator p = mon_info.begin();
298 out << p->first << "=" << p->second.public_addr;
304 void MonMap::print(ostream& out) const
306 out << "epoch " << epoch << "\n";
307 out << "fsid " << fsid << "\n";
308 out << "last_changed " << last_changed << "\n";
309 out << "created " << created << "\n";
311 for (vector<string>::const_iterator p = ranks.begin();
314 out << i++ << ": " << get_addr(*p) << " mon." << *p << "\n";
318 void MonMap::dump(Formatter *f) const
320 f->dump_unsigned("epoch", epoch);
321 f->dump_stream("fsid") << fsid;
322 f->dump_stream("modified") << last_changed;
323 f->dump_stream("created") << created;
324 f->open_object_section("features");
325 persistent_features.dump(f, "persistent");
326 optional_features.dump(f, "optional");
328 f->open_array_section("mons");
330 for (vector<string>::const_iterator p = ranks.begin();
333 f->open_object_section("mon");
334 f->dump_int("rank", i);
335 f->dump_string("name", *p);
336 f->dump_stream("addr") << get_addr(*p);
337 f->dump_stream("public_addr") << get_addr(*p);
344 int MonMap::build_from_host_list(std::string hostlist, std::string prefix)
346 vector<entity_addr_t> addrs;
347 if (parse_ip_port_vec(hostlist.c_str(), addrs)) {
350 for (unsigned i=0; i<addrs.size(); i++) {
354 if (addrs[i].get_port() == 0)
355 addrs[i].set_port(CEPH_MON_PORT);
356 string name = prefix;
358 if (!contains(addrs[i]))
364 // maybe they passed us a DNS-resolvable name
366 hosts = resolve_addrs(hostlist.c_str());
369 bool success = parse_ip_port_vec(hosts, addrs);
377 for (unsigned i=0; i<addrs.size(); i++) {
381 if (addrs[i].get_port() == 0)
382 addrs[i].set_port(CEPH_MON_PORT);
383 string name = prefix;
385 if (!contains(addrs[i]) &&
392 void MonMap::set_initial_members(CephContext *cct,
393 list<std::string>& initial_members,
394 string my_name, const entity_addr_t& my_addr,
395 set<entity_addr_t> *removed)
397 // remove non-initial members
400 string n = get_name(i);
401 if (std::find(initial_members.begin(), initial_members.end(), n) != initial_members.end()) {
402 lgeneric_dout(cct, 1) << " keeping " << n << " " << get_addr(i) << dendl;
407 lgeneric_dout(cct, 1) << " removing " << get_name(i) << " " << get_addr(i) << dendl;
409 removed->insert(get_addr(i));
411 assert(!contains(n));
414 // add missing initial members
415 for (list<string>::iterator p = initial_members.begin(); p != initial_members.end(); ++p) {
418 lgeneric_dout(cct, 1) << " adding self " << *p << " " << my_addr << dendl;
422 a.set_type(entity_addr_t::TYPE_LEGACY);
423 a.set_family(AF_INET);
424 for (int n=1; ; n++) {
429 lgeneric_dout(cct, 1) << " adding " << *p << " " << a << dendl;
432 assert(contains(*p));
438 int MonMap::build_initial(CephContext *cct, ostream& errout)
440 const md_config_t *conf = cct->_conf;
442 const auto monmap = conf->get_val<std::string>("monmap");
443 if (!monmap.empty()) {
446 r = read(monmap.c_str());
448 catch (const buffer::error &e) {
453 errout << "unable to read/decode monmap from " << monmap
454 << ": " << cpp_strerror(-r) << std::endl;
459 const auto new_fsid = conf->get_val<uuid_d>("fsid");
460 if (!new_fsid.is_zero()) {
465 const auto mon_host = conf->get_val<std::string>("mon_host");
466 if (!mon_host.empty()) {
467 int r = build_from_host_list(mon_host, "noname-");
469 errout << "unable to parse addrs in '" << mon_host << "'"
473 created = ceph_clock_now();
474 last_changed = created;
478 // What monitors are in the config file?
479 std::vector <std::string> sections;
480 int ret = conf->get_all_sections(sections);
482 errout << "Unable to find any monitors in the configuration "
483 << "file, because there was an error listing the sections. error "
487 std::vector <std::string> mon_names;
488 for (std::vector <std::string>::const_iterator s = sections.begin();
489 s != sections.end(); ++s) {
490 if ((s->substr(0, 4) == "mon.") && (s->size() > 4)) {
491 mon_names.push_back(s->substr(4));
495 // Find an address for each monitor in the config file.
496 for (std::vector <std::string>::const_iterator m = mon_names.begin();
497 m != mon_names.end(); ++m) {
498 std::vector <std::string> sections;
499 std::string m_name("mon");
502 sections.push_back(m_name);
503 sections.push_back("mon");
504 sections.push_back("global");
506 int res = conf->get_val_from_conf_file(sections, "mon addr", val, true);
508 errout << "failed to get an address for mon." << *m << ": error "
513 if (!addr.parse(val.c_str())) {
514 errout << "unable to parse address for mon." << *m
515 << ": addr='" << val << "'" << std::endl;
518 if (addr.get_port() == 0)
519 addr.set_port(CEPH_MON_PORT);
521 uint16_t priority = 0;
522 if (!conf->get_val_from_conf_file(sections, "mon priority", val, false)) {
524 priority = std::stoul(val);
525 } catch (std::logic_error&) {
526 errout << "unable to parse priority for mon." << *m
527 << ": priority='" << val << "'" << std::endl;
531 // the make sure this mon isn't already in the map
533 remove(get_name(addr));
537 add(mon_info_t{*m, addr, priority});
541 // no info found from conf options lets try use DNS SRV records
542 string srv_name = conf->get_val<std::string>("mon_dns_srv_name");
544 // check if domain is also provided and extract it from srv_name
545 size_t idx = srv_name.find("_");
546 if (idx != string::npos) {
547 domain = srv_name.substr(idx + 1);
548 srv_name = srv_name.substr(0, idx);
551 map<string, DNSResolver::Record> records;
552 if (DNSResolver::get_instance()->resolve_srv_hosts(cct, srv_name,
553 DNSResolver::SRV_Protocol::TCP, domain, &records) != 0) {
555 errout << "unable to get monitor info from DNS SRV with service name: " <<
556 "ceph-mon" << std::endl;
559 for (const auto& record : records) {
560 add(mon_info_t{record.first,
562 record.second.priority});
568 errout << "no monitors specified to connect to." << std::endl;
571 created = ceph_clock_now();
572 last_changed = created;