X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Ftools%2Frebuild_mondb.cc;fp=src%2Fceph%2Fsrc%2Ftools%2Frebuild_mondb.cc;h=1d070fc230de1e92f2cefb96a5acabd4689bbc9c;hb=812ff6ca9fcd3e629e49d4328905f33eee8ca3f5;hp=0000000000000000000000000000000000000000;hpb=15280273faafb77777eab341909a3f495cf248d9;p=stor4nfv.git diff --git a/src/ceph/src/tools/rebuild_mondb.cc b/src/ceph/src/tools/rebuild_mondb.cc new file mode 100644 index 0000000..1d070fc --- /dev/null +++ b/src/ceph/src/tools/rebuild_mondb.cc @@ -0,0 +1,397 @@ +#include "auth/cephx/CephxKeyServer.h" +#include "common/errno.h" +#include "mon/AuthMonitor.h" +#include "mon/MonitorDBStore.h" +#include "os/ObjectStore.h" +#include "osd/OSD.h" + +static int update_auth(const string& keyring_path, + const OSDSuperblock& sb, + MonitorDBStore& ms); +static int update_monitor(const OSDSuperblock& sb, MonitorDBStore& ms); +static int update_osdmap(ObjectStore& fs, + OSDSuperblock& sb, + MonitorDBStore& ms); +static int update_pgmap_pg(ObjectStore& fs, MonitorDBStore& ms); + +int update_mon_db(ObjectStore& fs, OSDSuperblock& sb, + const string& keyring, + const string& store_path) +{ + MonitorDBStore ms(store_path); + int r = ms.create_and_open(cerr); + if (r < 0) { + cerr << "unable to open mon store: " << store_path << std::endl; + return r; + } + if ((r = update_auth(keyring, sb, ms)) < 0) { + goto out; + } + if ((r = update_osdmap(fs, sb, ms)) < 0) { + goto out; + } + if ((r = update_pgmap_pg(fs, ms)) < 0) { + goto out; + } + if ((r = update_monitor(sb, ms)) < 0) { + goto out; + } + out: + ms.close(); + return r; +} + +static void add_auth(KeyServerData::Incremental& auth_inc, + MonitorDBStore& ms) +{ + AuthMonitor::Incremental inc; + inc.inc_type = AuthMonitor::AUTH_DATA; + ::encode(auth_inc, inc.auth_data); + inc.auth_type = CEPH_AUTH_CEPHX; + + bufferlist bl; + __u8 v = 1; + ::encode(v, bl); + inc.encode(bl, CEPH_FEATURES_ALL); + + const string prefix("auth"); + auto last_committed = ms.get(prefix, "last_committed") + 1; + auto t = make_shared(); + t->put(prefix, last_committed, bl); + t->put(prefix, "last_committed", last_committed); + auto first_committed = ms.get(prefix, "first_committed"); + if (!first_committed) { + t->put(prefix, "first_committed", last_committed); + } + ms.apply_transaction(t); +} + +static int get_auth_inc(const string& keyring_path, + const OSDSuperblock& sb, + KeyServerData::Incremental* auth_inc) +{ + auth_inc->op = KeyServerData::AUTH_INC_ADD; + + // get the name + EntityName entity; + // assuming the entity name of OSD is "osd." + entity.set(CEPH_ENTITY_TYPE_OSD, std::to_string(sb.whoami)); + auth_inc->name = entity; + + // read keyring from disk + KeyRing keyring; + { + bufferlist bl; + string error; + int r = bl.read_file(keyring_path.c_str(), &error); + if (r < 0) { + if (r == -ENOENT) { + cout << "ignoring keyring (" << keyring_path << ")" + << ": " << error << std::endl; + return 0; + } else { + cerr << "unable to read keyring (" << keyring_path << ")" + << ": " << error << std::endl; + return r; + } + } else if (bl.length() == 0) { + cout << "ignoring empty keyring: " << keyring_path << std::endl; + return 0; + } + auto bp = bl.begin(); + try { + ::decode(keyring, bp); + } catch (const buffer::error& e) { + cerr << "error decoding keyring: " << keyring_path << std::endl; + return -EINVAL; + } + } + + // get the key + EntityAuth new_inc; + if (!keyring.get_auth(auth_inc->name, new_inc)) { + cerr << "key for " << auth_inc->name << " not found in keyring: " + << keyring_path << std::endl; + return -EINVAL; + } + auth_inc->auth.key = new_inc.key; + + // get the caps + map caps; + if (new_inc.caps.empty()) { + // fallback to default caps for an OSD + // osd 'allow *' mon 'allow rwx' + // as suggested by document. + ::encode(string("allow *"), caps["osd"]); + ::encode(string("allow rwx"), caps["mon"]); + } else { + caps = new_inc.caps; + } + auth_inc->auth.caps = caps; + return 0; +} + +// rebuild +// - auth/${epoch} +// - auth/first_committed +// - auth/last_committed +static int update_auth(const string& keyring_path, + const OSDSuperblock& sb, + MonitorDBStore& ms) +{ + // stolen from AuthMonitor::prepare_command(), where prefix is "auth add" + KeyServerData::Incremental auth_inc; + int r; + if ((r = get_auth_inc(keyring_path, sb, &auth_inc))) { + return r; + } + add_auth(auth_inc, ms); + return 0; +} + +// stolen from Monitor::check_fsid() +static int check_fsid(const uuid_d& fsid, MonitorDBStore& ms) +{ + bufferlist bl; + int r = ms.get("monitor", "cluster_uuid", bl); + if (r == -ENOENT) + return r; + string uuid(bl.c_str(), bl.length()); + auto end = uuid.find_first_of('\n'); + if (end != uuid.npos) { + uuid.resize(end); + } + uuid_d existing; + if (!existing.parse(uuid.c_str())) { + cerr << "error: unable to parse uuid" << std::endl; + return -EINVAL; + } + if (fsid != existing) { + cerr << "error: cluster_uuid " << existing << " != " << fsid << std::endl; + return -EEXIST; + } + return 0; +} + +// rebuild +// - monitor/cluster_uuid +int update_monitor(const OSDSuperblock& sb, MonitorDBStore& ms) +{ + switch (check_fsid(sb.cluster_fsid, ms)) { + case -ENOENT: + break; + case -EINVAL: + return -EINVAL; + case -EEXIST: + return -EEXIST; + case 0: + return 0; + default: + ceph_abort(); + } + string uuid = stringify(sb.cluster_fsid) + "\n"; + bufferlist bl; + bl.append(uuid); + auto t = make_shared(); + t->put("monitor", "cluster_uuid", bl); + ms.apply_transaction(t); + return 0; +} + +// rebuild +// - osdmap/${epoch} +// - osdmap/full_${epoch} +// - osdmap/full_latest +// - osdmap/first_committed +// - osdmap/last_committed +int update_osdmap(ObjectStore& fs, OSDSuperblock& sb, MonitorDBStore& ms) +{ + const string prefix("osdmap"); + const string first_committed_name("first_committed"); + const string last_committed_name("last_committed"); + epoch_t first_committed = ms.get(prefix, first_committed_name); + epoch_t last_committed = ms.get(prefix, last_committed_name); + auto t = make_shared(); + + // trim stale maps + unsigned ntrimmed = 0; + // osdmap starts at 1. if we have a "0" first_committed, then there is nothing + // to trim. and "1 osdmaps trimmed" in the output message is misleading. so + // let's make it an exception. + for (auto e = first_committed; first_committed && e < sb.oldest_map; e++) { + t->erase(prefix, e); + t->erase(prefix, ms.combine_strings("full", e)); + ntrimmed++; + } + // make sure we have a non-zero first_committed. OSDMonitor relies on this. + // because PaxosService::put_last_committed() set it to last_committed, if it + // is zero. which breaks OSDMonitor::update_from_paxos(), in which we believe + // that latest_full should always be greater than last_committed. + if (first_committed == 0 && sb.oldest_map < sb.newest_map) { + first_committed = 1; + } else if (ntrimmed) { + first_committed += ntrimmed; + } + if (first_committed) { + t->put(prefix, first_committed_name, first_committed); + ms.apply_transaction(t); + t = make_shared(); + } + + unsigned nadded = 0; + + OSDMap osdmap; + for (auto e = max(last_committed+1, sb.oldest_map); + e <= sb.newest_map; e++) { + bool have_crc = false; + uint32_t crc = -1; + uint64_t features = 0; + // add inc maps + { + const auto oid = OSD::get_inc_osdmap_pobject_name(e); + bufferlist bl; + int nread = fs.read(coll_t::meta(), oid, 0, 0, bl); + if (nread <= 0) { + cerr << "missing " << oid << std::endl; + return -EINVAL; + } + t->put(prefix, e, bl); + + OSDMap::Incremental inc; + auto p = bl.begin(); + inc.decode(p); + features = inc.encode_features | CEPH_FEATURE_RESERVED; + if (osdmap.get_epoch() && e > 1) { + if (osdmap.apply_incremental(inc)) { + cerr << "bad fsid: " + << osdmap.get_fsid() << " != " << inc.fsid << std::endl; + return -EINVAL; + } + have_crc = inc.have_crc; + if (inc.have_crc) { + crc = inc.full_crc; + bufferlist fbl; + osdmap.encode(fbl, features); + if (osdmap.get_crc() != inc.full_crc) { + cerr << "mismatched inc crc: " + << osdmap.get_crc() << " != " << inc.full_crc << std::endl; + return -EINVAL; + } + // inc.decode() verifies `inc_crc`, so it's been taken care of. + } + } + } + // add full maps + { + const auto oid = OSD::get_osdmap_pobject_name(e); + bufferlist bl; + int nread = fs.read(coll_t::meta(), oid, 0, 0, bl); + if (nread <= 0) { + cerr << "missing " << oid << std::endl; + return -EINVAL; + } + t->put(prefix, ms.combine_strings("full", e), bl); + + auto p = bl.begin(); + osdmap.decode(p); + if (osdmap.have_crc()) { + if (have_crc && osdmap.get_crc() != crc) { + cerr << "mismatched full/inc crc: " + << osdmap.get_crc() << " != " << crc << std::endl; + return -EINVAL; + } + uint32_t saved_crc = osdmap.get_crc(); + bufferlist fbl; + osdmap.encode(fbl, features); + if (osdmap.get_crc() != saved_crc) { + cerr << "mismatched full crc: " + << saved_crc << " != " << osdmap.get_crc() << std::endl; + return -EINVAL; + } + } + } + nadded++; + + // last_committed + t->put(prefix, last_committed_name, e); + // full last + t->put(prefix, ms.combine_strings("full", "latest"), e); + + // this number comes from the default value of osd_target_transaction_size, + // so we won't OOM or stuff too many maps in a single transaction if OSD is + // keeping a large series of osdmap + static constexpr unsigned TRANSACTION_SIZE = 30; + if (t->size() >= TRANSACTION_SIZE) { + ms.apply_transaction(t); + t = make_shared(); + } + } + if (!t->empty()) { + ms.apply_transaction(t); + } + t.reset(); + + string osd_name("osd."); + osd_name += std::to_string(sb.whoami); + cout << std::left << setw(8) + << osd_name << ": " + << ntrimmed << " osdmaps trimmed, " + << nadded << " osdmaps added." << std::endl; + return 0; +} + +// rebuild +// - pgmap_pg/${pgid} +int update_pgmap_pg(ObjectStore& fs, MonitorDBStore& ms) +{ + // pgmap/${epoch} is the incremental of: stamp, pgmap_pg, pgmap_osd + // if PGMonitor fails to read it, it will fall back to the pgmap_pg, i.e. + // the fullmap. + vector collections; + int r = fs.list_collections(collections); + if (r < 0) { + cerr << "failed to list pgs: " << cpp_strerror(r) << std::endl; + return r; + } + const string prefix("pgmap_pg"); + // in general, there are less than 100 PGs per OSD, so no need to apply + // transaction in batch. + auto t = make_shared(); + unsigned npg = 0; + for (const auto& coll : collections) { + spg_t pgid; + if (!coll.is_pg(&pgid)) + continue; + bufferlist bl; + pg_info_t info(pgid); + PastIntervals past_intervals; + __u8 struct_v; + r = PG::read_info(&fs, pgid, coll, bl, info, past_intervals, struct_v); + if (r < 0) { + cerr << "failed to read_info: " << cpp_strerror(r) << std::endl; + return r; + } + if (struct_v < PG::cur_struct_v) { + cerr << "incompatible pg_info: v" << struct_v << std::endl; + return -EINVAL; + } + version_t latest_epoch = 0; + r = ms.get(prefix, stringify(pgid.pgid), bl); + if (r >= 0) { + pg_stat_t pg_stat; + auto bp = bl.begin(); + ::decode(pg_stat, bp); + latest_epoch = pg_stat.reported_epoch; + } + if (info.stats.reported_epoch > latest_epoch) { + bufferlist bl; + ::encode(info.stats, bl); + t->put(prefix, stringify(pgid.pgid), bl); + npg++; + } + } + ms.apply_transaction(t); + cout << std::left << setw(10) + << " " << npg << " pgs added." << std::endl; + return 0; +}