X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Fcrush%2FCrushWrapper.cc;fp=src%2Fceph%2Fsrc%2Fcrush%2FCrushWrapper.cc;h=0000000000000000000000000000000000000000;hb=7da45d65be36d36b880cc55c5036e96c24b53f00;hp=52af91f6f47d1f9c9815dd3f112b659c9878a8aa;hpb=691462d09d0987b47e112d6ee8740375df3c51b2;p=stor4nfv.git diff --git a/src/ceph/src/crush/CrushWrapper.cc b/src/ceph/src/crush/CrushWrapper.cc deleted file mode 100644 index 52af91f..0000000 --- a/src/ceph/src/crush/CrushWrapper.cc +++ /dev/null @@ -1,3382 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include "osd/osd_types.h" -#include "common/debug.h" -#include "common/Formatter.h" -#include "common/errno.h" -#include "common/TextTable.h" -#include "include/stringify.h" - -#include "CrushWrapper.h" -#include "CrushTreeDumper.h" - -#define dout_subsys ceph_subsys_crush - -bool CrushWrapper::has_legacy_rule_ids() const -{ - for (unsigned i=0; imax_rules; i++) { - crush_rule *r = crush->rules[i]; - if (r && - r->mask.ruleset != i) { - return true; - } - } - return false; -} - -std::map CrushWrapper::renumber_rules() -{ - std::map result; - for (unsigned i=0; imax_rules; i++) { - crush_rule *r = crush->rules[i]; - if (r && r->mask.ruleset != i) { - result[r->mask.ruleset] = i; - r->mask.ruleset = i; - } - } - return result; -} - -bool CrushWrapper::has_non_straw2_buckets() const -{ - for (int i=0; imax_buckets; ++i) { - crush_bucket *b = crush->buckets[i]; - if (!b) - continue; - if (b->alg != CRUSH_BUCKET_STRAW2) - return true; - } - return false; -} - -bool CrushWrapper::has_v2_rules() const -{ - for (unsigned i=0; imax_rules; i++) { - if (is_v2_rule(i)) { - return true; - } - } - return false; -} - -bool CrushWrapper::is_v2_rule(unsigned ruleid) const -{ - // check rule for use of indep or new SET_* rule steps - if (ruleid >= crush->max_rules) - return false; - crush_rule *r = crush->rules[ruleid]; - if (!r) - return false; - for (unsigned j=0; jlen; j++) { - if (r->steps[j].op == CRUSH_RULE_CHOOSE_INDEP || - r->steps[j].op == CRUSH_RULE_CHOOSELEAF_INDEP || - r->steps[j].op == CRUSH_RULE_SET_CHOOSE_TRIES || - r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_TRIES) { - return true; - } - } - return false; -} - -bool CrushWrapper::has_v3_rules() const -{ - for (unsigned i=0; imax_rules; i++) { - if (is_v3_rule(i)) { - return true; - } - } - return false; -} - -bool CrushWrapper::is_v3_rule(unsigned ruleid) const -{ - // check rule for use of SET_CHOOSELEAF_VARY_R step - if (ruleid >= crush->max_rules) - return false; - crush_rule *r = crush->rules[ruleid]; - if (!r) - return false; - for (unsigned j=0; jlen; j++) { - if (r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_VARY_R) { - return true; - } - } - return false; -} - -bool CrushWrapper::has_v4_buckets() const -{ - for (int i=0; imax_buckets; ++i) { - crush_bucket *b = crush->buckets[i]; - if (!b) - continue; - if (b->alg == CRUSH_BUCKET_STRAW2) - return true; - } - return false; -} - -bool CrushWrapper::has_v5_rules() const -{ - for (unsigned i=0; imax_rules; i++) { - if (is_v5_rule(i)) { - return true; - } - } - return false; -} - -bool CrushWrapper::is_v5_rule(unsigned ruleid) const -{ - // check rule for use of SET_CHOOSELEAF_STABLE step - if (ruleid >= crush->max_rules) - return false; - crush_rule *r = crush->rules[ruleid]; - if (!r) - return false; - for (unsigned j=0; jlen; j++) { - if (r->steps[j].op == CRUSH_RULE_SET_CHOOSELEAF_STABLE) { - return true; - } - } - return false; -} - -bool CrushWrapper::has_choose_args() const -{ - return !choose_args.empty(); -} - -bool CrushWrapper::has_incompat_choose_args() const -{ - if (choose_args.empty()) - return false; - if (choose_args.size() > 1) - return true; - if (choose_args.begin()->first != DEFAULT_CHOOSE_ARGS) - return true; - crush_choose_arg_map arg_map = choose_args.begin()->second; - for (__u32 i = 0; i < arg_map.size; i++) { - crush_choose_arg *arg = &arg_map.args[i]; - if (arg->weight_set_size == 0 && - arg->ids_size == 0) - continue; - if (arg->weight_set_size != 1) - return true; - if (arg->ids_size != 0) - return true; - } - return false; -} - -int CrushWrapper::split_id_class(int i, int *idout, int *classout) const -{ - if (!item_exists(i)) - return -EINVAL; - string name = get_item_name(i); - size_t pos = name.find("~"); - if (pos == string::npos) { - *idout = i; - *classout = -1; - return 0; - } - string name_no_class = name.substr(0, pos); - if (!name_exists(name_no_class)) - return -ENOENT; - string class_name = name.substr(pos + 1); - if (!class_exists(class_name)) - return -ENOENT; - *idout = get_item_id(name_no_class); - *classout = get_class_id(class_name); - return 0; -} - -int CrushWrapper::can_rename_item(const string& srcname, - const string& dstname, - ostream *ss) const -{ - if (name_exists(srcname)) { - if (name_exists(dstname)) { - *ss << "dstname = '" << dstname << "' already exists"; - return -EEXIST; - } - if (is_valid_crush_name(dstname)) { - return 0; - } else { - *ss << "dstname = '" << dstname << "' does not match [-_.0-9a-zA-Z]+"; - return -EINVAL; - } - } else { - if (name_exists(dstname)) { - *ss << "srcname = '" << srcname << "' does not exist " - << "and dstname = '" << dstname << "' already exists"; - return -EALREADY; - } else { - *ss << "srcname = '" << srcname << "' does not exist"; - return -ENOENT; - } - } -} - -int CrushWrapper::rename_item(const string& srcname, - const string& dstname, - ostream *ss) -{ - int ret = can_rename_item(srcname, dstname, ss); - if (ret < 0) - return ret; - int oldid = get_item_id(srcname); - return set_item_name(oldid, dstname); -} - -int CrushWrapper::can_rename_bucket(const string& srcname, - const string& dstname, - ostream *ss) const -{ - int ret = can_rename_item(srcname, dstname, ss); - if (ret) - return ret; - int srcid = get_item_id(srcname); - if (srcid >= 0) { - *ss << "srcname = '" << srcname << "' is not a bucket " - << "because its id = " << srcid << " is >= 0"; - return -ENOTDIR; - } - return 0; -} - -int CrushWrapper::rename_bucket(const string& srcname, - const string& dstname, - ostream *ss) -{ - int ret = can_rename_bucket(srcname, dstname, ss); - if (ret < 0) - return ret; - int oldid = get_item_id(srcname); - return set_item_name(oldid, dstname); -} - -int CrushWrapper::rename_rule(const string& srcname, - const string& dstname, - ostream *ss) -{ - if (!rule_exists(srcname)) { - if (ss) { - *ss << "source rule name '" << srcname << "' does not exist"; - } - return -ENOENT; - } - if (rule_exists(dstname)) { - if (ss) { - *ss << "destination rule name '" << dstname << "' already exists"; - } - return -EEXIST; - } - int rule_id = get_rule_id(srcname); - auto it = rule_name_map.find(rule_id); - assert(it != rule_name_map.end()); - it->second = dstname; - if (have_rmaps) { - rule_name_rmap.erase(srcname); - rule_name_rmap[dstname] = rule_id; - } - return 0; -} - -void CrushWrapper::find_takes(set *roots) const -{ - for (unsigned i=0; imax_rules; i++) { - crush_rule *r = crush->rules[i]; - if (!r) - continue; - for (unsigned j=0; jlen; j++) { - if (r->steps[j].op == CRUSH_RULE_TAKE) - roots->insert(r->steps[j].arg1); - } - } -} - -void CrushWrapper::find_roots(set *roots) const -{ - for (int i = 0; i < crush->max_buckets; i++) { - if (!crush->buckets[i]) - continue; - crush_bucket *b = crush->buckets[i]; - if (!_search_item_exists(b->id)) - roots->insert(b->id); - } -} - -bool CrushWrapper::subtree_contains(int root, int item) const -{ - if (root == item) - return true; - - if (root >= 0) - return false; // root is a leaf - - const crush_bucket *b = get_bucket(root); - if (IS_ERR(b)) - return false; - - for (unsigned j=0; jsize; j++) { - if (subtree_contains(b->items[j], item)) - return true; - } - return false; -} - -bool CrushWrapper::_maybe_remove_last_instance(CephContext *cct, int item, bool unlink_only) -{ - // last instance? - if (_search_item_exists(item)) { - return false; - } - if (item < 0 && _bucket_is_in_use(item)) { - return false; - } - - if (item < 0 && !unlink_only) { - crush_bucket *t = get_bucket(item); - ldout(cct, 5) << "_maybe_remove_last_instance removing bucket " << item << dendl; - crush_remove_bucket(crush, t); - if (class_bucket.count(item) != 0) - class_bucket.erase(item); - class_remove_item(item); - } - if ((item >= 0 || !unlink_only) && name_map.count(item)) { - ldout(cct, 5) << "_maybe_remove_last_instance removing name for item " << item << dendl; - name_map.erase(item); - have_rmaps = false; - if (item >= 0 && !unlink_only) { - class_remove_item(item); - } - } - rebuild_roots_with_classes(); - return true; -} - -int CrushWrapper::remove_root(int item) -{ - crush_bucket *b = get_bucket(item); - if (IS_ERR(b)) { - // should be idempotent - // e.g.: we use 'crush link' to link same host into - // different roots, which as a result can cause different - // shadow trees reference same hosts too. This means - // we may need to destory the same buckets(hosts, racks, etc.) - // multiple times during rebuilding all shadow trees. - return 0; - } - - for (unsigned n = 0; n < b->size; n++) { - if (b->items[n] >= 0) - continue; - int r = remove_root(b->items[n]); - if (r < 0) - return r; - } - - crush_remove_bucket(crush, b); - if (name_map.count(item) != 0) { - name_map.erase(item); - have_rmaps = false; - } - if (class_bucket.count(item) != 0) - class_bucket.erase(item); - class_remove_item(item); - return 0; -} - -int CrushWrapper::remove_item(CephContext *cct, int item, bool unlink_only) -{ - ldout(cct, 5) << "remove_item " << item - << (unlink_only ? " unlink_only":"") << dendl; - - int ret = -ENOENT; - - if (item < 0 && !unlink_only) { - crush_bucket *t = get_bucket(item); - if (IS_ERR(t)) { - ldout(cct, 1) << "remove_item bucket " << item << " does not exist" - << dendl; - return -ENOENT; - } - - if (t->size) { - ldout(cct, 1) << "remove_item bucket " << item << " has " << t->size - << " items, not empty" << dendl; - return -ENOTEMPTY; - } - if (_bucket_is_in_use(item)) { - return -EBUSY; - } - } - - for (int i = 0; i < crush->max_buckets; i++) { - if (!crush->buckets[i]) - continue; - crush_bucket *b = crush->buckets[i]; - - for (unsigned i=0; isize; ++i) { - int id = b->items[i]; - if (id == item) { - ldout(cct, 5) << "remove_item removing item " << item - << " from bucket " << b->id << dendl; - for (auto& p : choose_args) { - // weight down each weight-set to 0 before we remove the item - vector weightv(get_choose_args_positions(p.second), 0); - choose_args_adjust_item_weight(cct, p.second, item, weightv, nullptr); - } - bucket_remove_item(b, item); - adjust_item_weight(cct, b->id, b->weight); - ret = 0; - } - } - } - - if (_maybe_remove_last_instance(cct, item, unlink_only)) - ret = 0; - - return ret; -} - -bool CrushWrapper::_search_item_exists(int item) const -{ - for (int i = 0; i < crush->max_buckets; i++) { - if (!crush->buckets[i]) - continue; - crush_bucket *b = crush->buckets[i]; - for (unsigned j=0; jsize; ++j) { - if (b->items[j] == item) - return true; - } - } - return false; -} - -bool CrushWrapper::_bucket_is_in_use(int item) -{ - for (auto &i : class_bucket) - for (auto &j : i.second) - if (j.second == item) - return true; - for (unsigned i = 0; i < crush->max_rules; ++i) { - crush_rule *r = crush->rules[i]; - if (!r) - continue; - for (unsigned j = 0; j < r->len; ++j) { - if (r->steps[j].op == CRUSH_RULE_TAKE) { - int step_item = r->steps[j].arg1; - int original_item; - int c; - int res = split_id_class(step_item, &original_item, &c); - if (res < 0) - return false; - if (step_item == item || original_item == item) - return true; - } - } - } - return false; -} - -int CrushWrapper::_remove_item_under( - CephContext *cct, int item, int ancestor, bool unlink_only) -{ - ldout(cct, 5) << "_remove_item_under " << item << " under " << ancestor - << (unlink_only ? " unlink_only":"") << dendl; - - if (ancestor >= 0) { - return -EINVAL; - } - - if (!bucket_exists(ancestor)) - return -EINVAL; - - int ret = -ENOENT; - - crush_bucket *b = get_bucket(ancestor); - for (unsigned i=0; isize; ++i) { - int id = b->items[i]; - if (id == item) { - ldout(cct, 5) << "_remove_item_under removing item " << item - << " from bucket " << b->id << dendl; - for (auto& p : choose_args) { - // weight down each weight-set to 0 before we remove the item - vector weightv(get_choose_args_positions(p.second), 0); - _choose_args_adjust_item_weight_in_bucket( - cct, p.second, b->id, item, weightv, nullptr); - } - bucket_remove_item(b, item); - adjust_item_weight(cct, b->id, b->weight); - ret = 0; - } else if (id < 0) { - int r = remove_item_under(cct, item, id, unlink_only); - if (r == 0) - ret = 0; - } - } - return ret; -} - -int CrushWrapper::remove_item_under( - CephContext *cct, int item, int ancestor, bool unlink_only) -{ - ldout(cct, 5) << "remove_item_under " << item << " under " << ancestor - << (unlink_only ? " unlink_only":"") << dendl; - - if (!unlink_only && _bucket_is_in_use(item)) { - return -EBUSY; - } - - int ret = _remove_item_under(cct, item, ancestor, unlink_only); - if (ret < 0) - return ret; - - if (item < 0 && !unlink_only) { - crush_bucket *t = get_bucket(item); - if (IS_ERR(t)) { - ldout(cct, 1) << "remove_item_under bucket " << item - << " does not exist" << dendl; - return -ENOENT; - } - - if (t->size) { - ldout(cct, 1) << "remove_item_under bucket " << item << " has " << t->size - << " items, not empty" << dendl; - return -ENOTEMPTY; - } - } - - if (_maybe_remove_last_instance(cct, item, unlink_only)) - ret = 0; - - return ret; -} - -int CrushWrapper::get_common_ancestor_distance(CephContext *cct, int id, - const std::multimap& loc) -{ - ldout(cct, 5) << __func__ << " " << id << " " << loc << dendl; - if (!item_exists(id)) - return -ENOENT; - map id_loc = get_full_location(id); - ldout(cct, 20) << " id is at " << id_loc << dendl; - - for (map::const_iterator p = type_map.begin(); - p != type_map.end(); - ++p) { - map::iterator ip = id_loc.find(p->second); - if (ip == id_loc.end()) - continue; - for (std::multimap::const_iterator q = loc.find(p->second); - q != loc.end(); - ++q) { - if (q->first != p->second) - break; - if (q->second == ip->second) - return p->first; - } - } - return -ERANGE; -} - -int CrushWrapper::parse_loc_map(const std::vector& args, - std::map *ploc) -{ - ploc->clear(); - for (unsigned i = 0; i < args.size(); ++i) { - const char *s = args[i].c_str(); - const char *pos = strchr(s, '='); - if (!pos) - return -EINVAL; - string key(s, 0, pos-s); - string value(pos+1); - if (value.length()) - (*ploc)[key] = value; - else - return -EINVAL; - } - return 0; -} - -int CrushWrapper::parse_loc_multimap(const std::vector& args, - std::multimap *ploc) -{ - ploc->clear(); - for (unsigned i = 0; i < args.size(); ++i) { - const char *s = args[i].c_str(); - const char *pos = strchr(s, '='); - if (!pos) - return -EINVAL; - string key(s, 0, pos-s); - string value(pos+1); - if (value.length()) - ploc->insert(make_pair(key, value)); - else - return -EINVAL; - } - return 0; -} - -bool CrushWrapper::check_item_loc(CephContext *cct, int item, const map& loc, - int *weight) -{ - ldout(cct, 5) << "check_item_loc item " << item << " loc " << loc << dendl; - - for (map::const_iterator p = type_map.begin(); p != type_map.end(); ++p) { - // ignore device - if (p->first == 0) - continue; - - // ignore types that aren't specified in loc - map::const_iterator q = loc.find(p->second); - if (q == loc.end()) { - ldout(cct, 2) << "warning: did not specify location for '" << p->second << "' level (levels are " - << type_map << ")" << dendl; - continue; - } - - if (!name_exists(q->second)) { - ldout(cct, 5) << "check_item_loc bucket " << q->second << " dne" << dendl; - return false; - } - - int id = get_item_id(q->second); - if (id >= 0) { - ldout(cct, 5) << "check_item_loc requested " << q->second << " for type " << p->second - << " is a device, not bucket" << dendl; - return false; - } - - assert(bucket_exists(id)); - crush_bucket *b = get_bucket(id); - - // see if item exists in this bucket - for (unsigned j=0; jsize; j++) { - if (b->items[j] == item) { - ldout(cct, 2) << "check_item_loc " << item << " exists in bucket " << b->id << dendl; - if (weight) - *weight = crush_get_bucket_item_weight(b, j); - return true; - } - } - return false; - } - - ldout(cct, 1) << "check_item_loc item " << item << " loc " << loc << dendl; - return false; -} - -map CrushWrapper::get_full_location(int id) -{ - vector > full_location_ordered; - map full_location; - - get_full_location_ordered(id, full_location_ordered); - - std::copy(full_location_ordered.begin(), - full_location_ordered.end(), - std::inserter(full_location, full_location.begin())); - - return full_location; -} - -int CrushWrapper::get_full_location_ordered(int id, vector >& path) -{ - if (!item_exists(id)) - return -ENOENT; - int cur = id; - int ret; - while (true) { - pair parent_coord = get_immediate_parent(cur, &ret); - if (ret != 0) - break; - path.push_back(parent_coord); - cur = get_item_id(parent_coord.second); - } - return 0; -} - -string CrushWrapper::get_full_location_ordered_string(int id) -{ - vector > full_location_ordered; - string full_location; - get_full_location_ordered(id, full_location_ordered); - reverse(begin(full_location_ordered), end(full_location_ordered)); - for(auto i = full_location_ordered.begin(); i != full_location_ordered.end(); i++) { - full_location = full_location + i->first + "=" + i->second; - if (i != full_location_ordered.end() - 1) { - full_location = full_location + ","; - } - } - return full_location; -} - -map CrushWrapper::get_parent_hierarchy(int id) -{ - map parent_hierarchy; - pair parent_coord = get_immediate_parent(id); - int parent_id; - - // get the integer type for id and create a counter from there - int type_counter = get_bucket_type(id); - - // if we get a negative type then we can assume that we have an OSD - // change behavior in get_item_type FIXME - if (type_counter < 0) - type_counter = 0; - - // read the type map and get the name of the type with the largest ID - int high_type = 0; - for (map::iterator it = type_map.begin(); it != type_map.end(); ++it){ - if ( (*it).first > high_type ) - high_type = (*it).first; - } - - parent_id = get_item_id(parent_coord.second); - - while (type_counter < high_type) { - type_counter++; - parent_hierarchy[ type_counter ] = parent_coord.first; - - if (type_counter < high_type){ - // get the coordinate information for the next parent - parent_coord = get_immediate_parent(parent_id); - parent_id = get_item_id(parent_coord.second); - } - } - - return parent_hierarchy; -} - -int CrushWrapper::get_children(int id, list *children) -{ - // leaf? - if (id >= 0) { - return 0; - } - - crush_bucket *b = get_bucket(id); - if (IS_ERR(b)) { - return -ENOENT; - } - - for (unsigned n=0; nsize; n++) { - children->push_back(b->items[n]); - } - return b->size; -} - -int CrushWrapper::_get_leaves(int id, list *leaves) -{ - assert(leaves); - - // Already leaf? - if (id >= 0) { - leaves->push_back(id); - return 0; - } - - crush_bucket *b = get_bucket(id); - if (IS_ERR(b)) { - return -ENOENT; - } - - for (unsigned n = 0; n < b->size; n++) { - if (b->items[n] >= 0) { - leaves->push_back(b->items[n]); - } else { - // is a bucket, do recursive call - int r = _get_leaves(b->items[n], leaves); - if (r < 0) { - return r; - } - } - } - - return 0; // all is well -} - -int CrushWrapper::get_leaves(const string &name, set *leaves) -{ - assert(leaves); - leaves->clear(); - - if (!name_exists(name)) { - return -ENOENT; - } - - int id = get_item_id(name); - if (id >= 0) { - // already leaf - leaves->insert(id); - return 0; - } - - list unordered; - int r = _get_leaves(id, &unordered); - if (r < 0) { - return r; - } - - for (auto &p : unordered) { - leaves->insert(p); - } - - return 0; -} - -int CrushWrapper::insert_item( - CephContext *cct, int item, float weight, string name, - const map& loc) // typename -> bucketname -{ - ldout(cct, 5) << "insert_item item " << item << " weight " << weight - << " name " << name << " loc " << loc << dendl; - - if (!is_valid_crush_name(name)) - return -EINVAL; - - if (!is_valid_crush_loc(cct, loc)) - return -EINVAL; - - int r = validate_weightf(weight); - if (r < 0) { - return r; - } - - if (name_exists(name)) { - if (get_item_id(name) != item) { - ldout(cct, 10) << "device name '" << name << "' already exists as id " - << get_item_id(name) << dendl; - return -EEXIST; - } - } else { - set_item_name(item, name); - } - - int cur = item; - - // create locations if locations don't exist and add child in - // location with 0 weight the more detail in the insert_item method - // declaration in CrushWrapper.h - for (auto p = type_map.begin(); p != type_map.end(); ++p) { - // ignore device type - if (p->first == 0) - continue; - - // skip types that are unspecified - map::const_iterator q = loc.find(p->second); - if (q == loc.end()) { - ldout(cct, 2) << "warning: did not specify location for '" - << p->second << "' level (levels are " - << type_map << ")" << dendl; - continue; - } - - if (!name_exists(q->second)) { - ldout(cct, 5) << "insert_item creating bucket " << q->second << dendl; - int empty = 0, newid; - int r = add_bucket(0, 0, - CRUSH_HASH_DEFAULT, p->first, 1, &cur, &empty, &newid); - if (r < 0) { - ldout(cct, 1) << "add_bucket failure error: " << cpp_strerror(r) - << dendl; - return r; - } - set_item_name(newid, q->second); - - cur = newid; - continue; - } - - // add to an existing bucket - int id = get_item_id(q->second); - if (!bucket_exists(id)) { - ldout(cct, 1) << "insert_item doesn't have bucket " << id << dendl; - return -EINVAL; - } - - // check that we aren't creating a cycle. - if (subtree_contains(id, cur)) { - ldout(cct, 1) << "insert_item item " << cur << " already exists beneath " - << id << dendl; - return -EINVAL; - } - - // we have done sanity check above - crush_bucket *b = get_bucket(id); - - if (p->first != b->type) { - ldout(cct, 1) << "insert_item existing bucket has type " - << "'" << type_map[b->type] << "' != " - << "'" << type_map[p->first] << "'" << dendl; - return -EINVAL; - } - - // are we forming a loop? - if (subtree_contains(cur, b->id)) { - ldout(cct, 1) << "insert_item " << cur << " already contains " << b->id - << "; cannot form loop" << dendl; - return -ELOOP; - } - - ldout(cct, 5) << "insert_item adding " << cur << " weight " << weight - << " to bucket " << id << dendl; - int r = bucket_add_item(b, cur, 0); - assert (!r); - break; - } - - // adjust the item's weight in location - if (adjust_item_weightf_in_loc(cct, item, weight, loc) > 0) { - if (item >= crush->max_devices) { - crush->max_devices = item + 1; - ldout(cct, 5) << "insert_item max_devices now " << crush->max_devices - << dendl; - } - r = rebuild_roots_with_classes(); - if (r < 0) { - ldout(cct, 0) << __func__ << " unable to rebuild roots with classes: " - << cpp_strerror(r) << dendl; - return r; - } - return 0; - } - - ldout(cct, 1) << "error: didn't find anywhere to add item " << item - << " in " << loc << dendl; - return -EINVAL; -} - - -int CrushWrapper::move_bucket( - CephContext *cct, int id, const map& loc) -{ - // sorry this only works for buckets - if (id >= 0) - return -EINVAL; - - if (!item_exists(id)) - return -ENOENT; - - // get the name of the bucket we are trying to move for later - string id_name = get_item_name(id); - - // detach the bucket - int bucket_weight = detach_bucket(cct, id); - - // insert the bucket back into the hierarchy - return insert_item(cct, id, bucket_weight / (float)0x10000, id_name, loc); -} - -int CrushWrapper::detach_bucket(CephContext *cct, int item) -{ - if (!crush) - return (-EINVAL); - - if (item >= 0) - return (-EINVAL); - - // check that the bucket that we want to detach exists - assert(bucket_exists(item)); - - // get the bucket's weight - crush_bucket *b = get_bucket(item); - unsigned bucket_weight = b->weight; - - // get where the bucket is located - pair bucket_location = get_immediate_parent(item); - - // get the id of the parent bucket - int parent_id = get_item_id(bucket_location.second); - - // get the parent bucket - crush_bucket *parent_bucket = get_bucket(parent_id); - - if (!IS_ERR(parent_bucket)) { - // zero out the bucket weight - bucket_adjust_item_weight(cct, parent_bucket, item, 0); - adjust_item_weight(cct, parent_bucket->id, parent_bucket->weight); - for (auto& p : choose_args) { - // weight down each weight-set to 0 before we remove the item - vector weightv(get_choose_args_positions(p.second), 0); - choose_args_adjust_item_weight(cct, p.second, item, weightv, nullptr); - } - - // remove the bucket from the parent - bucket_remove_item(parent_bucket, item); - } else if (PTR_ERR(parent_bucket) != -ENOENT) { - return PTR_ERR(parent_bucket); - } - - // check that we're happy - int test_weight = 0; - map test_location; - test_location[ bucket_location.first ] = (bucket_location.second); - - bool successful_detach = !(check_item_loc(cct, item, test_location, - &test_weight)); - assert(successful_detach); - assert(test_weight == 0); - - return bucket_weight; -} - -int CrushWrapper::swap_bucket(CephContext *cct, int src, int dst) -{ - if (src >= 0 || dst >= 0) - return -EINVAL; - if (!item_exists(src) || !item_exists(dst)) - return -EINVAL; - crush_bucket *a = get_bucket(src); - crush_bucket *b = get_bucket(dst); - unsigned aw = a->weight; - unsigned bw = b->weight; - - // swap weights - adjust_item_weight(cct, a->id, bw); - adjust_item_weight(cct, b->id, aw); - - // swap items - map tmp; - unsigned as = a->size; - unsigned bs = b->size; - for (unsigned i = 0; i < as; ++i) { - int item = a->items[0]; - int itemw = crush_get_bucket_item_weight(a, 0); - tmp[item] = itemw; - bucket_remove_item(a, item); - } - assert(a->size == 0); - assert(b->size == bs); - for (unsigned i = 0; i < bs; ++i) { - int item = b->items[0]; - int itemw = crush_get_bucket_item_weight(b, 0); - bucket_remove_item(b, item); - bucket_add_item(a, item, itemw); - } - assert(a->size == bs); - assert(b->size == 0); - for (auto t : tmp) { - bucket_add_item(b, t.first, t.second); - } - assert(a->size == bs); - assert(b->size == as); - - // swap names - swap_names(src, dst); - return rebuild_roots_with_classes(); -} - -int CrushWrapper::link_bucket( - CephContext *cct, int id, const map& loc) -{ - // sorry this only works for buckets - if (id >= 0) - return -EINVAL; - - if (!item_exists(id)) - return -ENOENT; - - // get the name of the bucket we are trying to move for later - string id_name = get_item_name(id); - - crush_bucket *b = get_bucket(id); - unsigned bucket_weight = b->weight; - - return insert_item(cct, id, bucket_weight / (float)0x10000, id_name, loc); -} - -int CrushWrapper::create_or_move_item( - CephContext *cct, int item, float weight, string name, - const map& loc) // typename -> bucketname -{ - int ret = 0; - int old_iweight; - - if (!is_valid_crush_name(name)) - return -EINVAL; - - if (check_item_loc(cct, item, loc, &old_iweight)) { - ldout(cct, 5) << "create_or_move_item " << item << " already at " << loc - << dendl; - } else { - if (_search_item_exists(item)) { - weight = get_item_weightf(item); - ldout(cct, 10) << "create_or_move_item " << item - << " exists with weight " << weight << dendl; - remove_item(cct, item, true); - } - ldout(cct, 5) << "create_or_move_item adding " << item - << " weight " << weight - << " at " << loc << dendl; - ret = insert_item(cct, item, weight, name, loc); - if (ret == 0) - ret = 1; // changed - } - return ret; -} - -int CrushWrapper::update_item( - CephContext *cct, int item, float weight, string name, - const map& loc) // typename -> bucketname -{ - ldout(cct, 5) << "update_item item " << item << " weight " << weight - << " name " << name << " loc " << loc << dendl; - int ret = 0; - - if (!is_valid_crush_name(name)) - return -EINVAL; - - if (!is_valid_crush_loc(cct, loc)) - return -EINVAL; - - ret = validate_weightf(weight); - if (ret < 0) { - return ret; - } - - // compare quantized (fixed-point integer) weights! - int iweight = (int)(weight * (float)0x10000); - int old_iweight; - if (check_item_loc(cct, item, loc, &old_iweight)) { - ldout(cct, 5) << "update_item " << item << " already at " << loc << dendl; - if (old_iweight != iweight) { - ldout(cct, 5) << "update_item " << item << " adjusting weight " - << ((float)old_iweight/(float)0x10000) << " -> " << weight - << dendl; - adjust_item_weight_in_loc(cct, item, iweight, loc); - ret = 1; - } - if (get_item_name(item) != name) { - ldout(cct, 5) << "update_item setting " << item << " name to " << name - << dendl; - set_item_name(item, name); - ret = 1; - } - } else { - if (item_exists(item)) { - remove_item(cct, item, true); - } - ldout(cct, 5) << "update_item adding " << item << " weight " << weight - << " at " << loc << dendl; - ret = insert_item(cct, item, weight, name, loc); - if (ret == 0) - ret = 1; // changed - } - return ret; -} - -int CrushWrapper::get_item_weight(int id) const -{ - for (int bidx = 0; bidx < crush->max_buckets; bidx++) { - crush_bucket *b = crush->buckets[bidx]; - if (b == NULL) - continue; - if (b->id == id) - return b->weight; - for (unsigned i = 0; i < b->size; i++) - if (b->items[i] == id) - return crush_get_bucket_item_weight(b, i); - } - return -ENOENT; -} - -int CrushWrapper::get_item_weight_in_loc(int id, const map &loc) -{ - for (map::const_iterator l = loc.begin(); l != loc.end(); ++l) { - - int bid = get_item_id(l->second); - if (!bucket_exists(bid)) - continue; - crush_bucket *b = get_bucket(bid); - for (unsigned int i = 0; i < b->size; i++) { - if (b->items[i] == id) { - return crush_get_bucket_item_weight(b, i); - } - } - } - return -ENOENT; -} - -int CrushWrapper::adjust_item_weight(CephContext *cct, int id, int weight) -{ - ldout(cct, 5) << "adjust_item_weight " << id << " weight " << weight << dendl; - int changed = 0; - for (int bidx = 0; bidx < crush->max_buckets; bidx++) { - crush_bucket *b = crush->buckets[bidx]; - if (b == 0) - continue; - for (unsigned i = 0; i < b->size; i++) { - if (b->items[i] == id) { - int diff = bucket_adjust_item_weight(cct, b, id, weight); - ldout(cct, 5) << "adjust_item_weight " << id << " diff " << diff - << " in bucket " << bidx << dendl; - adjust_item_weight(cct, -1 - bidx, b->weight); - changed++; - } - } - } - if (!changed) - return -ENOENT; - return changed; -} - -int CrushWrapper::adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map& loc) -{ - ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " weight " << weight - << " in " << loc << dendl; - int changed = 0; - - for (auto l = loc.begin(); l != loc.end(); ++l) { - int bid = get_item_id(l->second); - if (!bucket_exists(bid)) - continue; - crush_bucket *b = get_bucket(bid); - for (unsigned int i = 0; i < b->size; i++) { - if (b->items[i] == id) { - int diff = bucket_adjust_item_weight(cct, b, id, weight); - ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " diff " << diff - << " in bucket " << bid << dendl; - adjust_item_weight(cct, bid, b->weight); - changed++; - } - } - } - if (!changed) - return -ENOENT; - return changed; -} - -int CrushWrapper::adjust_subtree_weight(CephContext *cct, int id, int weight) -{ - ldout(cct, 5) << __func__ << " " << id << " weight " << weight << dendl; - crush_bucket *b = get_bucket(id); - if (IS_ERR(b)) - return PTR_ERR(b); - int changed = 0; - list q; - q.push_back(b); - while (!q.empty()) { - b = q.front(); - q.pop_front(); - int local_changed = 0; - for (unsigned i=0; isize; ++i) { - int n = b->items[i]; - if (n >= 0) { - bucket_adjust_item_weight(cct, b, n, weight); - ++changed; - ++local_changed; - } else { - crush_bucket *sub = get_bucket(n); - if (IS_ERR(sub)) - continue; - q.push_back(sub); - } - } - if (local_changed) { - adjust_item_weight(cct, b->id, b->weight); - } - } - return changed; -} - -bool CrushWrapper::check_item_present(int id) const -{ - bool found = false; - - for (int bidx = 0; bidx < crush->max_buckets; bidx++) { - crush_bucket *b = crush->buckets[bidx]; - if (b == 0) - continue; - for (unsigned i = 0; i < b->size; i++) - if (b->items[i] == id) - found = true; - } - return found; -} - - -pair CrushWrapper::get_immediate_parent(int id, int *_ret) -{ - - for (int bidx = 0; bidx < crush->max_buckets; bidx++) { - crush_bucket *b = crush->buckets[bidx]; - if (b == 0) - continue; - if (is_shadow_item(b->id)) - continue; - for (unsigned i = 0; i < b->size; i++) - if (b->items[i] == id) { - string parent_id = name_map[b->id]; - string parent_bucket_type = type_map[b->type]; - if (_ret) - *_ret = 0; - return make_pair(parent_bucket_type, parent_id); - } - } - - if (_ret) - *_ret = -ENOENT; - - return pair(); -} - -int CrushWrapper::get_immediate_parent_id(int id, int *parent) const -{ - for (int bidx = 0; bidx < crush->max_buckets; bidx++) { - crush_bucket *b = crush->buckets[bidx]; - if (b == 0) - continue; - if (is_shadow_item(b->id)) - continue; - for (unsigned i = 0; i < b->size; i++) { - if (b->items[i] == id) { - *parent = b->id; - return 0; - } - } - } - return -ENOENT; -} - -int CrushWrapper::get_parent_of_type(int item, int type) const -{ - do { - int r = get_immediate_parent_id(item, &item); - if (r < 0) { - return 0; - } - } while (get_bucket_type(item) != type); - return item; -} - -int CrushWrapper::rename_class(const string& srcname, const string& dstname) -{ - auto i = class_rname.find(srcname); - if (i == class_rname.end()) - return -ENOENT; - auto j = class_rname.find(dstname); - if (j != class_rname.end()) - return -EEXIST; - - int class_id = i->second; - assert(class_name.count(class_id)); - // rename any shadow buckets of old class name - for (auto &it: class_map) { - if (it.first < 0 && it.second == class_id) { - string old_name = get_item_name(it.first); - size_t pos = old_name.find("~"); - assert(pos != string::npos); - string name_no_class = old_name.substr(0, pos); - string old_class_name = old_name.substr(pos + 1); - assert(old_class_name == srcname); - string new_name = name_no_class + "~" + dstname; - // we do not use set_item_name - // because the name is intentionally invalid - name_map[it.first] = new_name; - have_rmaps = false; - } - } - - // rename class - class_rname.erase(srcname); - class_name.erase(class_id); - class_rname[dstname] = class_id; - class_name[class_id] = dstname; - return 0; -} - -int CrushWrapper::populate_classes( - const std::map>& old_class_bucket) -{ - // build set of previous used shadow ids - set used_ids; - for (auto& p : old_class_bucket) { - for (auto& q : p.second) { - used_ids.insert(q.second); - } - } - // accumulate weight values for each carg and bucket as we go. because it is - // depth first, we will have the nested bucket weights we need when we - // finish constructing the containing buckets. - map>> cmap_item_weight; // cargs -> bno -> weights - set roots; - find_nonshadow_roots(&roots); - for (auto &r : roots) { - if (r >= 0) - continue; - for (auto &c : class_name) { - int clone; - int res = device_class_clone(r, c.first, old_class_bucket, used_ids, - &clone, &cmap_item_weight); - if (res < 0) - return res; - } - } - return 0; -} - -int CrushWrapper::trim_roots_with_class() -{ - set roots; - find_shadow_roots(&roots); - for (auto &r : roots) { - if (r >= 0) - continue; - int res = remove_root(r); - if (res) - return res; - } - // there is no need to reweight because we only remove from the - // root and down - return 0; -} - -int32_t CrushWrapper::_alloc_class_id() const { - if (class_name.empty()) { - return 0; - } - int32_t class_id = class_name.rbegin()->first + 1; - if (class_id >= 0) { - return class_id; - } - // wrapped, pick a random start and do exhaustive search - uint32_t upperlimit = numeric_limits::max(); - upperlimit++; - class_id = rand() % upperlimit; - const auto start = class_id; - do { - if (!class_name.count(class_id)) { - return class_id; - } else { - class_id++; - if (class_id < 0) { - class_id = 0; - } - } - } while (class_id != start); - assert(0 == "no available class id"); -} - -void CrushWrapper::reweight(CephContext *cct) -{ - set roots; - find_roots(&roots); - for (set::iterator p = roots.begin(); p != roots.end(); ++p) { - if (*p >= 0) - continue; - crush_bucket *b = get_bucket(*p); - ldout(cct, 5) << "reweight bucket " << *p << dendl; - int r = crush_reweight_bucket(crush, b); - assert(r == 0); - } -} - -int CrushWrapper::add_simple_rule_at( - string name, string root_name, - string failure_domain_name, - string device_class, - string mode, int rule_type, - int rno, - ostream *err) -{ - if (rule_exists(name)) { - if (err) - *err << "rule " << name << " exists"; - return -EEXIST; - } - if (rno >= 0) { - if (rule_exists(rno)) { - if (err) - *err << "rule with ruleno " << rno << " exists"; - return -EEXIST; - } - if (ruleset_exists(rno)) { - if (err) - *err << "ruleset " << rno << " exists"; - return -EEXIST; - } - } else { - for (rno = 0; rno < get_max_rules(); rno++) { - if (!rule_exists(rno) && !ruleset_exists(rno)) - break; - } - } - if (!name_exists(root_name)) { - if (err) - *err << "root item " << root_name << " does not exist"; - return -ENOENT; - } - int root = get_item_id(root_name); - int type = 0; - if (failure_domain_name.length()) { - type = get_type_id(failure_domain_name); - if (type < 0) { - if (err) - *err << "unknown type " << failure_domain_name; - return -EINVAL; - } - } - if (device_class.size()) { - if (!class_exists(device_class)) { - if (err) - *err << "device class " << device_class << " does not exist"; - return -EINVAL; - } - int c = get_class_id(device_class); - if (class_bucket.count(root) == 0 || - class_bucket[root].count(c) == 0) { - if (err) - *err << "root " << root_name << " has no devices with class " - << device_class; - return -EINVAL; - } - root = class_bucket[root][c]; - } - if (mode != "firstn" && mode != "indep") { - if (err) - *err << "unknown mode " << mode; - return -EINVAL; - } - - int steps = 3; - if (mode == "indep") - steps = 5; - int min_rep = mode == "firstn" ? 1 : 3; - int max_rep = mode == "firstn" ? 10 : 20; - //set the ruleset the same as rule_id(rno) - crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_rep, max_rep); - assert(rule); - int step = 0; - if (mode == "indep") { - crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); - crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); - } - crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0); - if (type) - crush_rule_set_step(rule, step++, - mode == "firstn" ? CRUSH_RULE_CHOOSELEAF_FIRSTN : - CRUSH_RULE_CHOOSELEAF_INDEP, - CRUSH_CHOOSE_N, - type); - else - crush_rule_set_step(rule, step++, - mode == "firstn" ? CRUSH_RULE_CHOOSE_FIRSTN : - CRUSH_RULE_CHOOSE_INDEP, - CRUSH_CHOOSE_N, - 0); - crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); - - int ret = crush_add_rule(crush, rule, rno); - if(ret < 0) { - *err << "failed to add rule " << rno << " because " << cpp_strerror(ret); - return ret; - } - set_rule_name(rno, name); - have_rmaps = false; - return rno; -} - -int CrushWrapper::add_simple_rule( - string name, string root_name, - string failure_domain_name, - string device_class, - string mode, int rule_type, - ostream *err) -{ - return add_simple_rule_at(name, root_name, failure_domain_name, device_class, - mode, - rule_type, -1, err); -} - -float CrushWrapper::_get_take_weight_osd_map(int root, - map *pmap) const -{ - float sum = 0.0; - list q; - q.push_back(root); - //breadth first iterate the OSD tree - while (!q.empty()) { - int bno = q.front(); - q.pop_front(); - crush_bucket *b = crush->buckets[-1-bno]; - assert(b); - for (unsigned j=0; jsize; ++j) { - int item_id = b->items[j]; - if (item_id >= 0) { //it's an OSD - float w = crush_get_bucket_item_weight(b, j); - (*pmap)[item_id] = w; - sum += w; - } else { //not an OSD, expand the child later - q.push_back(item_id); - } - } - } - return sum; -} - -void CrushWrapper::_normalize_weight_map(float sum, - const map& m, - map *pmap) const -{ - for (auto& p : m) { - map::iterator q = pmap->find(p.first); - if (q == pmap->end()) { - (*pmap)[p.first] = p.second / sum; - } else { - q->second += p.second / sum; - } - } -} - -int CrushWrapper::get_take_weight_osd_map(int root, map *pmap) const -{ - map m; - float sum = _get_take_weight_osd_map(root, &m); - _normalize_weight_map(sum, m, pmap); - return 0; -} - -int CrushWrapper::get_rule_weight_osd_map(unsigned ruleno, - map *pmap) const -{ - if (ruleno >= crush->max_rules) - return -ENOENT; - if (crush->rules[ruleno] == NULL) - return -ENOENT; - crush_rule *rule = crush->rules[ruleno]; - - // build a weight map for each TAKE in the rule, and then merge them - - // FIXME: if there are multiple takes that place a different number of - // objects we do not take that into account. (Also, note that doing this - // right is also a function of the pool, since the crush rule - // might choose 2 + choose 2 but pool size may only be 3.) - for (unsigned i=0; ilen; ++i) { - map m; - float sum = 0; - if (rule->steps[i].op == CRUSH_RULE_TAKE) { - int n = rule->steps[i].arg1; - if (n >= 0) { - m[n] = 1.0; - sum = 1.0; - } else { - sum += _get_take_weight_osd_map(n, &m); - } - } - _normalize_weight_map(sum, m, pmap); - } - - return 0; -} - -int CrushWrapper::remove_rule(int ruleno) -{ - if (ruleno >= (int)crush->max_rules) - return -ENOENT; - if (crush->rules[ruleno] == NULL) - return -ENOENT; - crush_destroy_rule(crush->rules[ruleno]); - crush->rules[ruleno] = NULL; - rule_name_map.erase(ruleno); - have_rmaps = false; - return rebuild_roots_with_classes(); -} - -int CrushWrapper::bucket_adjust_item_weight(CephContext *cct, crush_bucket *bucket, int item, int weight) -{ - if (cct->_conf->osd_crush_update_weight_set) { - unsigned position; - for (position = 0; position < bucket->size; position++) - if (bucket->items[position] == item) - break; - assert(position != bucket->size); - for (auto &w : choose_args) { - crush_choose_arg_map &arg_map = w.second; - crush_choose_arg *arg = &arg_map.args[-1-bucket->id]; - for (__u32 j = 0; j < arg->weight_set_size; j++) { - crush_weight_set *weight_set = &arg->weight_set[j]; - weight_set->weights[position] = weight; - } - } - } - return crush_bucket_adjust_item_weight(crush, bucket, item, weight); -} - -int CrushWrapper::add_bucket( - int bucketno, int alg, int hash, int type, int size, - int *items, int *weights, int *idout) -{ - if (alg == 0) { - alg = get_default_bucket_alg(); - if (alg == 0) - return -EINVAL; - } - crush_bucket *b = crush_make_bucket(crush, alg, hash, type, size, items, - weights); - assert(b); - assert(idout); - int r = crush_add_bucket(crush, bucketno, b, idout); - int pos = -1 - *idout; - for (auto& p : choose_args) { - crush_choose_arg_map& cmap = p.second; - if (cmap.args) { - if ((int)cmap.size <= pos) { - cmap.args = (crush_choose_arg*)realloc( - cmap.args, - sizeof(crush_choose_arg) * (pos + 1)); - assert(cmap.args); - memset(&cmap.args[cmap.size], 0, - sizeof(crush_choose_arg) * (pos + 1 - cmap.size)); - cmap.size = pos + 1; - } - } else { - cmap.args = (crush_choose_arg*)calloc(sizeof(crush_choose_arg), - pos + 1); - assert(cmap.args); - cmap.size = pos + 1; - } - if (size > 0) { - int positions = get_choose_args_positions(cmap); - crush_choose_arg& carg = cmap.args[pos]; - carg.weight_set = (crush_weight_set*)calloc(sizeof(crush_weight_set), - size); - carg.weight_set_size = positions; - for (int ppos = 0; ppos < positions; ++ppos) { - carg.weight_set[ppos].weights = (__u32*)calloc(sizeof(__u32), size); - carg.weight_set[ppos].size = size; - for (int bpos = 0; bpos < size; ++bpos) { - carg.weight_set[ppos].weights[bpos] = weights[bpos]; - } - } - } - } - return r; -} - -int CrushWrapper::bucket_add_item(crush_bucket *bucket, int item, int weight) -{ - __u32 new_size = bucket->size + 1; - int r = crush_bucket_add_item(crush, bucket, item, weight); - if (r < 0) { - return r; - } - for (auto &w : choose_args) { - crush_choose_arg_map &arg_map = w.second; - crush_choose_arg *arg = &arg_map.args[-1-bucket->id]; - for (__u32 j = 0; j < arg->weight_set_size; j++) { - crush_weight_set *weight_set = &arg->weight_set[j]; - weight_set->weights = (__u32*)realloc(weight_set->weights, - new_size * sizeof(__u32)); - assert(weight_set->size + 1 == new_size); - weight_set->weights[weight_set->size] = weight; - weight_set->size = new_size; - } - if (arg->ids_size) { - arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32)); - assert(arg->ids_size + 1 == new_size); - arg->ids[arg->ids_size] = item; - arg->ids_size = new_size; - } - } - return 0; -} - -int CrushWrapper::bucket_remove_item(crush_bucket *bucket, int item) -{ - __u32 new_size = bucket->size - 1; - unsigned position; - for (position = 0; position < bucket->size; position++) - if (bucket->items[position] == item) - break; - assert(position != bucket->size); - int r = crush_bucket_remove_item(crush, bucket, item); - if (r < 0) { - return r; - } - for (auto &w : choose_args) { - crush_choose_arg_map &arg_map = w.second; - crush_choose_arg *arg = &arg_map.args[-1-bucket->id]; - for (__u32 j = 0; j < arg->weight_set_size; j++) { - crush_weight_set *weight_set = &arg->weight_set[j]; - assert(weight_set->size - 1 == new_size); - for (__u32 k = position; k < new_size; k++) - weight_set->weights[k] = weight_set->weights[k+1]; - if (new_size) { - weight_set->weights = (__u32*)realloc(weight_set->weights, - new_size * sizeof(__u32)); - } else { - weight_set->weights = NULL; - } - weight_set->size = new_size; - } - if (arg->ids_size) { - assert(arg->ids_size - 1 == new_size); - for (__u32 k = position; k < new_size; k++) - arg->ids[k] = arg->ids[k+1]; - if (new_size) { - arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32)); - } else { - arg->ids = NULL; - } - arg->ids_size = new_size; - } - } - return 0; -} - -int CrushWrapper::bucket_set_alg(int bid, int alg) -{ - crush_bucket *b = get_bucket(bid); - if (!b) { - return -ENOENT; - } - b->alg = alg; - return 0; -} - -int CrushWrapper::update_device_class(int id, - const string& class_name, - const string& name, - ostream *ss) -{ - assert(item_exists(id)); - auto old_class_name = get_item_class(id); - if (old_class_name && old_class_name != class_name) { - *ss << "osd." << id << " has already bound to class '" << old_class_name - << "', can not reset class to '" << class_name << "'; " - << "use 'ceph osd crush rm-device-class ' to " - << "remove old class first"; - return -EBUSY; - } - - int class_id = get_or_create_class_id(class_name); - if (id < 0) { - *ss << name << " id " << id << " is negative"; - return -EINVAL; - } - - if (class_map.count(id) != 0 && class_map[id] == class_id) { - *ss << name << " already set to class " << class_name; - return 0; - } - - set_item_class(id, class_id); - - int r = rebuild_roots_with_classes(); - if (r < 0) - return r; - return 1; -} - -int CrushWrapper::remove_device_class(CephContext *cct, int id, ostream *ss) -{ - assert(ss); - const char *name = get_item_name(id); - if (!name) { - *ss << "osd." << id << " does not have a name"; - return -ENOENT; - } - - const char *class_name = get_item_class(id); - if (!class_name) { - *ss << "osd." << id << " has not been bound to a specific class yet"; - return 0; - } - class_remove_item(id); - - int r = rebuild_roots_with_classes(); - if (r < 0) { - *ss << "unable to rebuild roots with class '" << class_name << "' " - << "of osd." << id << ": " << cpp_strerror(r); - return r; - } - return 0; -} - -int CrushWrapper::device_class_clone( - int original_id, int device_class, - const std::map>& old_class_bucket, - const std::set& used_ids, - int *clone, - map>> *cmap_item_weight) -{ - const char *item_name = get_item_name(original_id); - if (item_name == NULL) - return -ECHILD; - const char *class_name = get_class_name(device_class); - if (class_name == NULL) - return -EBADF; - string copy_name = item_name + string("~") + class_name; - if (name_exists(copy_name)) { - *clone = get_item_id(copy_name); - return 0; - } - - crush_bucket *original = get_bucket(original_id); - assert(!IS_ERR(original)); - crush_bucket *copy = crush_make_bucket(crush, - original->alg, - original->hash, - original->type, - 0, NULL, NULL); - assert(copy); - - vector item_orig_pos; // new item pos -> orig item pos - for (unsigned i = 0; i < original->size; i++) { - int item = original->items[i]; - int weight = crush_get_bucket_item_weight(original, i); - if (item >= 0) { - if (class_map.count(item) != 0 && class_map[item] == device_class) { - int res = crush_bucket_add_item(crush, copy, item, weight); - if (res) - return res; - } else { - continue; - } - } else { - int child_copy_id; - int res = device_class_clone(item, device_class, old_class_bucket, - used_ids, &child_copy_id, - cmap_item_weight); - if (res < 0) - return res; - crush_bucket *child_copy = get_bucket(child_copy_id); - assert(!IS_ERR(child_copy)); - res = crush_bucket_add_item(crush, copy, child_copy_id, - child_copy->weight); - if (res) - return res; - } - item_orig_pos.push_back(i); - } - assert(item_orig_pos.size() == copy->size); - - int bno = 0; - if (old_class_bucket.count(original_id) && - old_class_bucket.at(original_id).count(device_class)) { - bno = old_class_bucket.at(original_id).at(device_class); - } else { - // pick a new shadow bucket id that is not used by the current map - // *or* any previous shadow buckets. - bno = -1; - while (((-1-bno) < crush->max_buckets && crush->buckets[-1-bno]) || - used_ids.count(bno)) { - --bno; - } - } - int res = crush_add_bucket(crush, bno, copy, clone); - if (res) - return res; - assert(!bno || bno == *clone); - - res = set_item_class(*clone, device_class); - if (res < 0) - return res; - - // we do not use set_item_name because the name is intentionally invalid - name_map[*clone] = copy_name; - if (have_rmaps) - name_rmap[copy_name] = *clone; - class_bucket[original_id][device_class] = *clone; - - // set up choose_args for the new bucket. - for (auto& w : choose_args) { - crush_choose_arg_map& cmap = w.second; - if (-1-bno >= (int)cmap.size) { - unsigned new_size = -1-bno + 1; - cmap.args = (crush_choose_arg*)realloc(cmap.args, - new_size * sizeof(cmap.args[0])); - assert(cmap.args); - memset(cmap.args + cmap.size, 0, - (new_size - cmap.size) * sizeof(cmap.args[0])); - cmap.size = new_size; - } - auto& o = cmap.args[-1-original_id]; - auto& n = cmap.args[-1-bno]; - n.ids_size = 0; // FIXME: implement me someday - n.weight_set_size = o.weight_set_size; - n.weight_set = (crush_weight_set*)calloc( - n.weight_set_size, sizeof(crush_weight_set)); - for (size_t s = 0; s < n.weight_set_size; ++s) { - n.weight_set[s].size = copy->size; - n.weight_set[s].weights = (__u32*)calloc(copy->size, sizeof(__u32)); - } - for (size_t s = 0; s < n.weight_set_size; ++s) { - vector bucket_weights(n.weight_set_size); - for (size_t i = 0; i < copy->size; ++i) { - int item = copy->items[i]; - if (item >= 0) { - n.weight_set[s].weights[i] = o.weight_set[s].weights[item_orig_pos[i]]; - } else { - n.weight_set[s].weights[i] = (*cmap_item_weight)[w.first][item][s]; - } - bucket_weights[s] += n.weight_set[s].weights[i]; - } - (*cmap_item_weight)[w.first][bno] = bucket_weights; - } - } - return 0; -} - -int CrushWrapper::get_rules_by_class(const string &class_name, set *rules) -{ - assert(rules); - rules->clear(); - if (!class_exists(class_name)) { - return -ENOENT; - } - int class_id = get_class_id(class_name); - for (unsigned i = 0; i < crush->max_rules; ++i) { - crush_rule *r = crush->rules[i]; - if (!r) - continue; - for (unsigned j = 0; j < r->len; ++j) { - if (r->steps[j].op == CRUSH_RULE_TAKE) { - int step_item = r->steps[j].arg1; - int original_item; - int c; - int res = split_id_class(step_item, &original_item, &c); - if (res < 0) { - return res; - } - if (c != -1 && c == class_id) { - rules->insert(i); - break; - } - } - } - } - return 0; -} - -// return rules that might reference the given osd -int CrushWrapper::get_rules_by_osd(int osd, set *rules) -{ - assert(rules); - rules->clear(); - if (osd < 0) { - return -EINVAL; - } - for (unsigned i = 0; i < crush->max_rules; ++i) { - crush_rule *r = crush->rules[i]; - if (!r) - continue; - for (unsigned j = 0; j < r->len; ++j) { - if (r->steps[j].op == CRUSH_RULE_TAKE) { - int step_item = r->steps[j].arg1; - list unordered; - int rc = _get_leaves(step_item, &unordered); - if (rc < 0) { - return rc; // propagate fatal errors! - } - bool match = false; - for (auto &o: unordered) { - assert(o >= 0); - if (o == osd) { - match = true; - break; - } - } - if (match) { - rules->insert(i); - break; - } - } - } - } - return 0; -} - -bool CrushWrapper::_class_is_dead(int class_id) -{ - for (auto &p: class_map) { - if (p.first >= 0 && p.second == class_id) { - return false; - } - } - for (unsigned i = 0; i < crush->max_rules; ++i) { - crush_rule *r = crush->rules[i]; - if (!r) - continue; - for (unsigned j = 0; j < r->len; ++j) { - if (r->steps[j].op == CRUSH_RULE_TAKE) { - int root = r->steps[j].arg1; - for (auto &p : class_bucket) { - auto& q = p.second; - if (q.count(class_id) && q[class_id] == root) { - return false; - } - } - } - } - } - // no more referenced by any devices or crush rules - return true; -} - -void CrushWrapper::cleanup_dead_classes() -{ - auto p = class_name.begin(); - while (p != class_name.end()) { - if (_class_is_dead(p->first)) { - string n = p->second; - ++p; - remove_class_name(n); - } else { - ++p; - } - } -} - -int CrushWrapper::rebuild_roots_with_classes() -{ - std::map > old_class_bucket = class_bucket; - cleanup_dead_classes(); - int r = trim_roots_with_class(); - if (r < 0) - return r; - class_bucket.clear(); - return populate_classes(old_class_bucket); -} - -void CrushWrapper::encode(bufferlist& bl, uint64_t features) const -{ - assert(crush); - - __u32 magic = CRUSH_MAGIC; - ::encode(magic, bl); - - ::encode(crush->max_buckets, bl); - ::encode(crush->max_rules, bl); - ::encode(crush->max_devices, bl); - - bool encode_compat_choose_args = false; - crush_choose_arg_map arg_map; - memset(&arg_map, '\0', sizeof(arg_map)); - if (has_choose_args() && - !HAVE_FEATURE(features, CRUSH_CHOOSE_ARGS)) { - assert(!has_incompat_choose_args()); - encode_compat_choose_args = true; - arg_map = choose_args.begin()->second; - } - - // buckets - for (int i=0; imax_buckets; i++) { - __u32 alg = 0; - if (crush->buckets[i]) alg = crush->buckets[i]->alg; - ::encode(alg, bl); - if (!alg) - continue; - - ::encode(crush->buckets[i]->id, bl); - ::encode(crush->buckets[i]->type, bl); - ::encode(crush->buckets[i]->alg, bl); - ::encode(crush->buckets[i]->hash, bl); - ::encode(crush->buckets[i]->weight, bl); - ::encode(crush->buckets[i]->size, bl); - for (unsigned j=0; jbuckets[i]->size; j++) - ::encode(crush->buckets[i]->items[j], bl); - - switch (crush->buckets[i]->alg) { - case CRUSH_BUCKET_UNIFORM: - ::encode((reinterpret_cast(crush->buckets[i]))->item_weight, bl); - break; - - case CRUSH_BUCKET_LIST: - for (unsigned j=0; jbuckets[i]->size; j++) { - ::encode((reinterpret_cast(crush->buckets[i]))->item_weights[j], bl); - ::encode((reinterpret_cast(crush->buckets[i]))->sum_weights[j], bl); - } - break; - - case CRUSH_BUCKET_TREE: - ::encode((reinterpret_cast(crush->buckets[i]))->num_nodes, bl); - for (unsigned j=0; j<(reinterpret_cast(crush->buckets[i]))->num_nodes; j++) - ::encode((reinterpret_cast(crush->buckets[i]))->node_weights[j], bl); - break; - - case CRUSH_BUCKET_STRAW: - for (unsigned j=0; jbuckets[i]->size; j++) { - ::encode((reinterpret_cast(crush->buckets[i]))->item_weights[j], bl); - ::encode((reinterpret_cast(crush->buckets[i]))->straws[j], bl); - } - break; - - case CRUSH_BUCKET_STRAW2: - { - __u32 *weights; - if (encode_compat_choose_args && - arg_map.args[i].weight_set_size > 0) { - weights = arg_map.args[i].weight_set[0].weights; - } else { - weights = (reinterpret_cast(crush->buckets[i]))->item_weights; - } - for (unsigned j=0; jbuckets[i]->size; j++) { - ::encode(weights[j], bl); - } - } - break; - - default: - ceph_abort(); - break; - } - } - - // rules - for (unsigned i=0; imax_rules; i++) { - __u32 yes = crush->rules[i] ? 1:0; - ::encode(yes, bl); - if (!yes) - continue; - - ::encode(crush->rules[i]->len, bl); - ::encode(crush->rules[i]->mask, bl); - for (unsigned j=0; jrules[i]->len; j++) - ::encode(crush->rules[i]->steps[j], bl); - } - - // name info - ::encode(type_map, bl); - ::encode(name_map, bl); - ::encode(rule_name_map, bl); - - // tunables - ::encode(crush->choose_local_tries, bl); - ::encode(crush->choose_local_fallback_tries, bl); - ::encode(crush->choose_total_tries, bl); - ::encode(crush->chooseleaf_descend_once, bl); - ::encode(crush->chooseleaf_vary_r, bl); - ::encode(crush->straw_calc_version, bl); - ::encode(crush->allowed_bucket_algs, bl); - if (features & CEPH_FEATURE_CRUSH_TUNABLES5) { - ::encode(crush->chooseleaf_stable, bl); - } - - if (HAVE_FEATURE(features, SERVER_LUMINOUS)) { - // device classes - ::encode(class_map, bl); - ::encode(class_name, bl); - ::encode(class_bucket, bl); - - // choose args - __u32 size = (__u32)choose_args.size(); - ::encode(size, bl); - for (auto c : choose_args) { - ::encode(c.first, bl); - crush_choose_arg_map arg_map = c.second; - size = 0; - for (__u32 i = 0; i < arg_map.size; i++) { - crush_choose_arg *arg = &arg_map.args[i]; - if (arg->weight_set_size == 0 && - arg->ids_size == 0) - continue; - size++; - } - ::encode(size, bl); - for (__u32 i = 0; i < arg_map.size; i++) { - crush_choose_arg *arg = &arg_map.args[i]; - if (arg->weight_set_size == 0 && - arg->ids_size == 0) - continue; - ::encode(i, bl); - ::encode(arg->weight_set_size, bl); - for (__u32 j = 0; j < arg->weight_set_size; j++) { - crush_weight_set *weight_set = &arg->weight_set[j]; - ::encode(weight_set->size, bl); - for (__u32 k = 0; k < weight_set->size; k++) - ::encode(weight_set->weights[k], bl); - } - ::encode(arg->ids_size, bl); - for (__u32 j = 0; j < arg->ids_size; j++) - ::encode(arg->ids[j], bl); - } - } - } -} - -static void decode_32_or_64_string_map(map& m, bufferlist::iterator& blp) -{ - m.clear(); - __u32 n; - ::decode(n, blp); - while (n--) { - __s32 key; - ::decode(key, blp); - - __u32 strlen; - ::decode(strlen, blp); - if (strlen == 0) { - // der, key was actually 64-bits! - ::decode(strlen, blp); - } - ::decode_nohead(strlen, m[key], blp); - } -} - -void CrushWrapper::decode(bufferlist::iterator& blp) -{ - create(); - - __u32 magic; - ::decode(magic, blp); - if (magic != CRUSH_MAGIC) - throw buffer::malformed_input("bad magic number"); - - ::decode(crush->max_buckets, blp); - ::decode(crush->max_rules, blp); - ::decode(crush->max_devices, blp); - - // legacy tunables, unless we decode something newer - set_tunables_legacy(); - - try { - // buckets - crush->buckets = (crush_bucket**)calloc(1, crush->max_buckets * sizeof(crush_bucket*)); - for (int i=0; imax_buckets; i++) { - decode_crush_bucket(&crush->buckets[i], blp); - } - - // rules - crush->rules = (crush_rule**)calloc(1, crush->max_rules * sizeof(crush_rule*)); - for (unsigned i = 0; i < crush->max_rules; ++i) { - __u32 yes; - ::decode(yes, blp); - if (!yes) { - crush->rules[i] = NULL; - continue; - } - - __u32 len; - ::decode(len, blp); - crush->rules[i] = reinterpret_cast(calloc(1, crush_rule_size(len))); - crush->rules[i]->len = len; - ::decode(crush->rules[i]->mask, blp); - for (unsigned j=0; jrules[i]->len; j++) - ::decode(crush->rules[i]->steps[j], blp); - } - - // name info - // NOTE: we had a bug where we were incoding int instead of int32, which means the - // 'key' field for these maps may be either 32 or 64 bits, depending. tolerate - // both by assuming the string is always non-empty. - decode_32_or_64_string_map(type_map, blp); - decode_32_or_64_string_map(name_map, blp); - decode_32_or_64_string_map(rule_name_map, blp); - - // tunables - if (!blp.end()) { - ::decode(crush->choose_local_tries, blp); - ::decode(crush->choose_local_fallback_tries, blp); - ::decode(crush->choose_total_tries, blp); - } - if (!blp.end()) { - ::decode(crush->chooseleaf_descend_once, blp); - } - if (!blp.end()) { - ::decode(crush->chooseleaf_vary_r, blp); - } - if (!blp.end()) { - ::decode(crush->straw_calc_version, blp); - } - if (!blp.end()) { - ::decode(crush->allowed_bucket_algs, blp); - } - if (!blp.end()) { - ::decode(crush->chooseleaf_stable, blp); - } - if (!blp.end()) { - ::decode(class_map, blp); - ::decode(class_name, blp); - for (auto &c : class_name) - class_rname[c.second] = c.first; - ::decode(class_bucket, blp); - } - if (!blp.end()) { - __u32 choose_args_size; - ::decode(choose_args_size, blp); - for (__u32 i = 0; i < choose_args_size; i++) { - typename decltype(choose_args)::key_type choose_args_index; - ::decode(choose_args_index, blp); - crush_choose_arg_map arg_map; - arg_map.size = crush->max_buckets; - arg_map.args = (crush_choose_arg*)calloc( - arg_map.size, sizeof(crush_choose_arg)); - __u32 size; - ::decode(size, blp); - for (__u32 j = 0; j < size; j++) { - __u32 bucket_index; - ::decode(bucket_index, blp); - assert(bucket_index < arg_map.size); - crush_choose_arg *arg = &arg_map.args[bucket_index]; - ::decode(arg->weight_set_size, blp); - if (arg->weight_set_size) { - arg->weight_set = (crush_weight_set*)calloc( - arg->weight_set_size, sizeof(crush_weight_set)); - for (__u32 k = 0; k < arg->weight_set_size; k++) { - crush_weight_set *weight_set = &arg->weight_set[k]; - ::decode(weight_set->size, blp); - weight_set->weights = (__u32*)calloc( - weight_set->size, sizeof(__u32)); - for (__u32 l = 0; l < weight_set->size; l++) - ::decode(weight_set->weights[l], blp); - } - } - ::decode(arg->ids_size, blp); - if (arg->ids_size) { - assert(arg->ids_size == crush->buckets[bucket_index]->size); - arg->ids = (__s32 *)calloc(arg->ids_size, sizeof(__s32)); - for (__u32 k = 0; k < arg->ids_size; k++) - ::decode(arg->ids[k], blp); - } - } - choose_args[choose_args_index] = arg_map; - } - } - finalize(); - } - catch (...) { - crush_destroy(crush); - throw; - } -} - -void CrushWrapper::decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator &blp) -{ - __u32 alg; - ::decode(alg, blp); - if (!alg) { - *bptr = NULL; - return; - } - - int size = 0; - switch (alg) { - case CRUSH_BUCKET_UNIFORM: - size = sizeof(crush_bucket_uniform); - break; - case CRUSH_BUCKET_LIST: - size = sizeof(crush_bucket_list); - break; - case CRUSH_BUCKET_TREE: - size = sizeof(crush_bucket_tree); - break; - case CRUSH_BUCKET_STRAW: - size = sizeof(crush_bucket_straw); - break; - case CRUSH_BUCKET_STRAW2: - size = sizeof(crush_bucket_straw2); - break; - default: - { - char str[128]; - snprintf(str, sizeof(str), "unsupported bucket algorithm: %d", alg); - throw buffer::malformed_input(str); - } - } - crush_bucket *bucket = reinterpret_cast(calloc(1, size)); - *bptr = bucket; - - ::decode(bucket->id, blp); - ::decode(bucket->type, blp); - ::decode(bucket->alg, blp); - ::decode(bucket->hash, blp); - ::decode(bucket->weight, blp); - ::decode(bucket->size, blp); - - bucket->items = (__s32*)calloc(1, bucket->size * sizeof(__s32)); - for (unsigned j = 0; j < bucket->size; ++j) { - ::decode(bucket->items[j], blp); - } - - switch (bucket->alg) { - case CRUSH_BUCKET_UNIFORM: - ::decode((reinterpret_cast(bucket))->item_weight, blp); - break; - - case CRUSH_BUCKET_LIST: { - crush_bucket_list* cbl = reinterpret_cast(bucket); - cbl->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32)); - cbl->sum_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32)); - - for (unsigned j = 0; j < bucket->size; ++j) { - ::decode(cbl->item_weights[j], blp); - ::decode(cbl->sum_weights[j], blp); - } - break; - } - - case CRUSH_BUCKET_TREE: { - crush_bucket_tree* cbt = reinterpret_cast(bucket); - ::decode(cbt->num_nodes, blp); - cbt->node_weights = (__u32*)calloc(1, cbt->num_nodes * sizeof(__u32)); - for (unsigned j=0; jnum_nodes; j++) { - ::decode(cbt->node_weights[j], blp); - } - break; - } - - case CRUSH_BUCKET_STRAW: { - crush_bucket_straw* cbs = reinterpret_cast(bucket); - cbs->straws = (__u32*)calloc(1, bucket->size * sizeof(__u32)); - cbs->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32)); - for (unsigned j = 0; j < bucket->size; ++j) { - ::decode(cbs->item_weights[j], blp); - ::decode(cbs->straws[j], blp); - } - break; - } - - case CRUSH_BUCKET_STRAW2: { - crush_bucket_straw2* cbs = reinterpret_cast(bucket); - cbs->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32)); - for (unsigned j = 0; j < bucket->size; ++j) { - ::decode(cbs->item_weights[j], blp); - } - break; - } - - default: - // We should have handled this case in the first switch statement - ceph_abort(); - break; - } -} - - -void CrushWrapper::dump(Formatter *f) const -{ - f->open_array_section("devices"); - for (int i=0; iopen_object_section("device"); - f->dump_int("id", i); - const char *n = get_item_name(i); - if (n) { - f->dump_string("name", n); - } else { - char name[20]; - sprintf(name, "device%d", i); - f->dump_string("name", name); - } - const char *device_class = get_item_class(i); - if (device_class != NULL) - f->dump_string("class", device_class); - f->close_section(); - } - f->close_section(); - - f->open_array_section("types"); - int n = get_num_type_names(); - for (int i=0; n; i++) { - const char *name = get_type_name(i); - if (!name) { - if (i == 0) { - f->open_object_section("type"); - f->dump_int("type_id", 0); - f->dump_string("name", "device"); - f->close_section(); - } - continue; - } - n--; - f->open_object_section("type"); - f->dump_int("type_id", i); - f->dump_string("name", name); - f->close_section(); - } - f->close_section(); - - f->open_array_section("buckets"); - for (int bucket = -1; bucket > -1-get_max_buckets(); --bucket) { - if (!bucket_exists(bucket)) - continue; - f->open_object_section("bucket"); - f->dump_int("id", bucket); - if (get_item_name(bucket)) - f->dump_string("name", get_item_name(bucket)); - f->dump_int("type_id", get_bucket_type(bucket)); - if (get_type_name(get_bucket_type(bucket))) - f->dump_string("type_name", get_type_name(get_bucket_type(bucket))); - f->dump_int("weight", get_bucket_weight(bucket)); - f->dump_string("alg", crush_bucket_alg_name(get_bucket_alg(bucket))); - f->dump_string("hash", crush_hash_name(get_bucket_hash(bucket))); - f->open_array_section("items"); - for (int j=0; jopen_object_section("item"); - f->dump_int("id", get_bucket_item(bucket, j)); - f->dump_int("weight", get_bucket_item_weight(bucket, j)); - f->dump_int("pos", j); - f->close_section(); - } - f->close_section(); - f->close_section(); - } - f->close_section(); - - f->open_array_section("rules"); - dump_rules(f); - f->close_section(); - - f->open_object_section("tunables"); - dump_tunables(f); - f->close_section(); - - dump_choose_args(f); -} - -namespace { - // depth first walker - class TreeDumper { - typedef CrushTreeDumper::Item Item; - const CrushWrapper *crush; - const CrushTreeDumper::name_map_t& weight_set_names; - public: - explicit TreeDumper(const CrushWrapper *crush, - const CrushTreeDumper::name_map_t& wsnames) - : crush(crush), weight_set_names(wsnames) {} - - void dump(Formatter *f) { - set roots; - crush->find_roots(&roots); - for (set::iterator root = roots.begin(); root != roots.end(); ++root) { - dump_item(Item(*root, 0, 0, crush->get_bucket_weightf(*root)), f); - } - } - - private: - void dump_item(const Item& qi, Formatter* f) { - if (qi.is_bucket()) { - f->open_object_section("bucket"); - CrushTreeDumper::dump_item_fields(crush, weight_set_names, qi, f); - dump_bucket_children(qi, f); - f->close_section(); - } else { - f->open_object_section("device"); - CrushTreeDumper::dump_item_fields(crush, weight_set_names, qi, f); - f->close_section(); - } - } - - void dump_bucket_children(const Item& parent, Formatter* f) { - f->open_array_section("items"); - const int max_pos = crush->get_bucket_size(parent.id); - for (int pos = 0; pos < max_pos; pos++) { - int id = crush->get_bucket_item(parent.id, pos); - float weight = crush->get_bucket_item_weightf(parent.id, pos); - dump_item(Item(id, parent.id, parent.depth + 1, weight), f); - } - f->close_section(); - } - }; -} - -void CrushWrapper::dump_tree( - Formatter *f, - const CrushTreeDumper::name_map_t& weight_set_names) const -{ - assert(f); - TreeDumper(this, weight_set_names).dump(f); -} - -void CrushWrapper::dump_tunables(Formatter *f) const -{ - f->dump_int("choose_local_tries", get_choose_local_tries()); - f->dump_int("choose_local_fallback_tries", get_choose_local_fallback_tries()); - f->dump_int("choose_total_tries", get_choose_total_tries()); - f->dump_int("chooseleaf_descend_once", get_chooseleaf_descend_once()); - f->dump_int("chooseleaf_vary_r", get_chooseleaf_vary_r()); - f->dump_int("chooseleaf_stable", get_chooseleaf_stable()); - f->dump_int("straw_calc_version", get_straw_calc_version()); - f->dump_int("allowed_bucket_algs", get_allowed_bucket_algs()); - - // be helpful about it - if (has_jewel_tunables()) - f->dump_string("profile", "jewel"); - else if (has_hammer_tunables()) - f->dump_string("profile", "hammer"); - else if (has_firefly_tunables()) - f->dump_string("profile", "firefly"); - else if (has_bobtail_tunables()) - f->dump_string("profile", "bobtail"); - else if (has_argonaut_tunables()) - f->dump_string("profile", "argonaut"); - else - f->dump_string("profile", "unknown"); - f->dump_int("optimal_tunables", (int)has_optimal_tunables()); - f->dump_int("legacy_tunables", (int)has_legacy_tunables()); - - // be helpful about minimum version required - f->dump_string("minimum_required_version", get_min_required_version()); - - f->dump_int("require_feature_tunables", (int)has_nondefault_tunables()); - f->dump_int("require_feature_tunables2", (int)has_nondefault_tunables2()); - f->dump_int("has_v2_rules", (int)has_v2_rules()); - f->dump_int("require_feature_tunables3", (int)has_nondefault_tunables3()); - f->dump_int("has_v3_rules", (int)has_v3_rules()); - f->dump_int("has_v4_buckets", (int)has_v4_buckets()); - f->dump_int("require_feature_tunables5", (int)has_nondefault_tunables5()); - f->dump_int("has_v5_rules", (int)has_v5_rules()); -} - -void CrushWrapper::dump_choose_args(Formatter *f) const -{ - f->open_object_section("choose_args"); - for (auto c : choose_args) { - crush_choose_arg_map arg_map = c.second; - f->open_array_section(stringify(c.first).c_str()); - for (__u32 i = 0; i < arg_map.size; i++) { - crush_choose_arg *arg = &arg_map.args[i]; - if (arg->weight_set_size == 0 && - arg->ids_size == 0) - continue; - f->open_object_section("choose_args"); - int bucket_index = i; - f->dump_int("bucket_id", -1-bucket_index); - if (arg->weight_set_size > 0) { - f->open_array_section("weight_set"); - for (__u32 j = 0; j < arg->weight_set_size; j++) { - f->open_array_section("weights"); - __u32 *weights = arg->weight_set[j].weights; - __u32 size = arg->weight_set[j].size; - for (__u32 k = 0; k < size; k++) { - f->dump_float("weight", (float)weights[k]/(float)0x10000); - } - f->close_section(); - } - f->close_section(); - } - if (arg->ids_size > 0) { - f->open_array_section("ids"); - for (__u32 j = 0; j < arg->ids_size; j++) - f->dump_int("id", arg->ids[j]); - f->close_section(); - } - f->close_section(); - } - f->close_section(); - } - f->close_section(); -} - -void CrushWrapper::dump_rules(Formatter *f) const -{ - for (int i=0; iopen_object_section("rule"); - f->dump_int("rule_id", ruleset); - if (get_rule_name(ruleset)) - f->dump_string("rule_name", get_rule_name(ruleset)); - f->dump_int("ruleset", get_rule_mask_ruleset(ruleset)); - f->dump_int("type", get_rule_mask_type(ruleset)); - f->dump_int("min_size", get_rule_mask_min_size(ruleset)); - f->dump_int("max_size", get_rule_mask_max_size(ruleset)); - f->open_array_section("steps"); - for (int j=0; jopen_object_section("step"); - switch (get_rule_op(ruleset, j)) { - case CRUSH_RULE_NOOP: - f->dump_string("op", "noop"); - break; - case CRUSH_RULE_TAKE: - f->dump_string("op", "take"); - { - int item = get_rule_arg1(ruleset, j); - f->dump_int("item", item); - - const char *name = get_item_name(item); - f->dump_string("item_name", name ? name : ""); - } - break; - case CRUSH_RULE_EMIT: - f->dump_string("op", "emit"); - break; - case CRUSH_RULE_CHOOSE_FIRSTN: - f->dump_string("op", "choose_firstn"); - f->dump_int("num", get_rule_arg1(ruleset, j)); - f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j))); - break; - case CRUSH_RULE_CHOOSE_INDEP: - f->dump_string("op", "choose_indep"); - f->dump_int("num", get_rule_arg1(ruleset, j)); - f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j))); - break; - case CRUSH_RULE_CHOOSELEAF_FIRSTN: - f->dump_string("op", "chooseleaf_firstn"); - f->dump_int("num", get_rule_arg1(ruleset, j)); - f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j))); - break; - case CRUSH_RULE_CHOOSELEAF_INDEP: - f->dump_string("op", "chooseleaf_indep"); - f->dump_int("num", get_rule_arg1(ruleset, j)); - f->dump_string("type", get_type_name(get_rule_arg2(ruleset, j))); - break; - case CRUSH_RULE_SET_CHOOSE_TRIES: - f->dump_string("op", "set_choose_tries"); - f->dump_int("num", get_rule_arg1(ruleset, j)); - break; - case CRUSH_RULE_SET_CHOOSELEAF_TRIES: - f->dump_string("op", "set_chooseleaf_tries"); - f->dump_int("num", get_rule_arg1(ruleset, j)); - break; - default: - f->dump_int("opcode", get_rule_op(ruleset, j)); - f->dump_int("arg1", get_rule_arg1(ruleset, j)); - f->dump_int("arg2", get_rule_arg2(ruleset, j)); - } - f->close_section(); - } - f->close_section(); - f->close_section(); -} - -void CrushWrapper::list_rules(Formatter *f) const -{ - for (int rule = 0; rule < get_max_rules(); rule++) { - if (!rule_exists(rule)) - continue; - f->dump_string("name", get_rule_name(rule)); - } -} - -void CrushWrapper::list_rules(ostream *ss) const -{ - for (int rule = 0; rule < get_max_rules(); rule++) { - if (!rule_exists(rule)) - continue; - *ss << get_rule_name(rule) << "\n"; - } -} - -class CrushTreePlainDumper : public CrushTreeDumper::Dumper { -public: - typedef CrushTreeDumper::Dumper Parent; - - explicit CrushTreePlainDumper(const CrushWrapper *crush, - const CrushTreeDumper::name_map_t& wsnames) - : Parent(crush, wsnames) {} - explicit CrushTreePlainDumper(const CrushWrapper *crush, - const CrushTreeDumper::name_map_t& wsnames, - bool show_shadow) - : Parent(crush, wsnames, show_shadow) {} - - - void dump(TextTable *tbl) { - tbl->define_column("ID", TextTable::LEFT, TextTable::RIGHT); - tbl->define_column("CLASS", TextTable::LEFT, TextTable::RIGHT); - tbl->define_column("WEIGHT", TextTable::LEFT, TextTable::RIGHT); - for (auto& p : crush->choose_args) { - if (p.first == CrushWrapper::DEFAULT_CHOOSE_ARGS) { - tbl->define_column("(compat)", TextTable::LEFT, TextTable::RIGHT); - } else { - string name; - auto q = weight_set_names.find(p.first); - name = q != weight_set_names.end() ? q->second : - stringify(p.first); - tbl->define_column(name.c_str(), TextTable::LEFT, TextTable::RIGHT); - } - } - tbl->define_column("TYPE NAME", TextTable::LEFT, TextTable::LEFT); - Parent::dump(tbl); - } - -protected: - void dump_item(const CrushTreeDumper::Item &qi, TextTable *tbl) override { - const char *c = crush->get_item_class(qi.id); - if (!c) - c = ""; - *tbl << qi.id - << c - << weightf_t(qi.weight); - for (auto& p : crush->choose_args) { - if (qi.parent < 0) { - const crush_choose_arg_map cmap = crush->choose_args_get(p.first); - int bidx = -1 - qi.parent; - const crush_bucket *b = crush->get_bucket(qi.parent); - if (b && - bidx < (int)cmap.size && - cmap.args[bidx].weight_set && - cmap.args[bidx].weight_set_size >= 1) { - int pos; - for (pos = 0; - pos < (int)cmap.args[bidx].weight_set[0].size && - b->items[pos] != qi.id; - ++pos) ; - *tbl << weightf_t((float)cmap.args[bidx].weight_set[0].weights[pos] / - (float)0x10000); - continue; - } - } - *tbl << ""; - } - ostringstream ss; - for (int k=0; k < qi.depth; k++) { - ss << " "; - } - if (qi.is_bucket()) { - ss << crush->get_type_name(crush->get_bucket_type(qi.id)) << " " - << crush->get_item_name(qi.id); - } else { - ss << "osd." << qi.id; - } - *tbl << ss.str(); - *tbl << TextTable::endrow; - } -}; - - -class CrushTreeFormattingDumper : public CrushTreeDumper::FormattingDumper { -public: - typedef CrushTreeDumper::FormattingDumper Parent; - - explicit CrushTreeFormattingDumper( - const CrushWrapper *crush, - const CrushTreeDumper::name_map_t& wsnames) - : Parent(crush, wsnames) {} - - explicit CrushTreeFormattingDumper( - const CrushWrapper *crush, - const CrushTreeDumper::name_map_t& wsnames, - bool show_shadow) - : Parent(crush, wsnames, show_shadow) {} - - void dump(Formatter *f) { - f->open_array_section("nodes"); - Parent::dump(f); - f->close_section(); - f->open_array_section("stray"); - f->close_section(); - } -}; - - -void CrushWrapper::dump_tree( - ostream *out, - Formatter *f, - const CrushTreeDumper::name_map_t& weight_set_names, - bool show_shadow) const -{ - if (out) { - TextTable tbl; - CrushTreePlainDumper(this, weight_set_names, show_shadow).dump(&tbl); - *out << tbl; - } - if (f) { - CrushTreeFormattingDumper(this, weight_set_names, show_shadow).dump(f); - } -} - -void CrushWrapper::generate_test_instances(list& o) -{ - o.push_back(new CrushWrapper); - // fixme -} - -/** - * Determine the default CRUSH ruleset ID to be used with - * newly created replicated pools. - * - * @returns a ruleset ID (>=0) or -1 if no suitable ruleset found - */ -int CrushWrapper::get_osd_pool_default_crush_replicated_ruleset(CephContext *cct) -{ - int crush_ruleset = cct->_conf->osd_pool_default_crush_rule; - if (crush_ruleset < 0) { - crush_ruleset = find_first_ruleset(pg_pool_t::TYPE_REPLICATED); - } else if (!ruleset_exists(crush_ruleset)) { - crush_ruleset = -1; // match find_first_ruleset() retval - } - return crush_ruleset; -} - -bool CrushWrapper::is_valid_crush_name(const string& s) -{ - if (s.empty()) - return false; - for (string::const_iterator p = s.begin(); p != s.end(); ++p) { - if (!(*p == '-') && - !(*p == '_') && - !(*p == '.') && - !(*p >= '0' && *p <= '9') && - !(*p >= 'A' && *p <= 'Z') && - !(*p >= 'a' && *p <= 'z')) - return false; - } - return true; -} - -bool CrushWrapper::is_valid_crush_loc(CephContext *cct, - const map& loc) -{ - for (map::const_iterator l = loc.begin(); l != loc.end(); ++l) { - if (!is_valid_crush_name(l->first) || - !is_valid_crush_name(l->second)) { - ldout(cct, 1) << "loc[" - << l->first << "] = '" - << l->second << "' not a valid crush name ([A-Za-z0-9_-.]+)" - << dendl; - return false; - } - } - return true; -} - -int CrushWrapper::_choose_type_stack( - CephContext *cct, - const vector>& stack, - const set& overfull, - const vector& underfull, - const vector& orig, - vector::const_iterator& i, - set& used, - vector *pw) const -{ - vector w = *pw; - vector o; - - ldout(cct, 10) << __func__ << " stack " << stack - << " orig " << orig - << " at " << *i - << " pw " << *pw - << dendl; - - vector cumulative_fanout(stack.size()); - int f = 1; - for (int j = (int)stack.size() - 1; j >= 0; --j) { - cumulative_fanout[j] = f; - f *= stack[j].second; - } - ldout(cct, 10) << __func__ << " cumulative_fanout " << cumulative_fanout - << dendl; - - // identify underful targets for each intermediate level. - // this serves two purposes: - // 1. we can tell when we are selecting a bucket that does not have any underfull - // devices beneath it. that means that if the current input includes an overfull - // device, we won't be able to find an underfull device with this parent to - // swap for it. - // 2. when we decide we should reject a bucket due to the above, this list gives us - // a list of peers to consider that *do* have underfull devices available.. (we - // are careful to pick one that has the same parent.) - vector> underfull_buckets; // level -> set of buckets with >0 underfull item(s) - underfull_buckets.resize(stack.size() - 1); - for (auto osd : underfull) { - int item = osd; - for (int j = (int)stack.size() - 2; j >= 0; --j) { - int type = stack[j].first; - item = get_parent_of_type(item, type); - ldout(cct, 10) << __func__ << " underfull " << osd << " type " << type - << " is " << item << dendl; - underfull_buckets[j].insert(item); - } - } - ldout(cct, 20) << __func__ << " underfull_buckets " << underfull_buckets << dendl; - - for (unsigned j = 0; j < stack.size(); ++j) { - int type = stack[j].first; - int fanout = stack[j].second; - int cum_fanout = cumulative_fanout[j]; - ldout(cct, 10) << " level " << j << ": type " << type << " fanout " << fanout - << " cumulative " << cum_fanout - << " w " << w << dendl; - vector o; - auto tmpi = i; - for (auto from : w) { - ldout(cct, 10) << " from " << from << dendl; - // identify leaves under each choice. we use this to check whether any of these - // leaves are overfull. (if so, we need to make sure there are underfull candidates - // to swap for them.) - vector> leaves; - leaves.resize(fanout); - for (int pos = 0; pos < fanout; ++pos) { - if (type > 0) { - // non-leaf - int item = get_parent_of_type(*tmpi, type); - o.push_back(item); - int n = cum_fanout; - while (n-- && tmpi != orig.end()) { - leaves[pos].insert(*tmpi++); - } - ldout(cct, 10) << __func__ << " from " << *tmpi << " got " << item - << " of type " << type << " over leaves " << leaves[pos] << dendl; - } else { - // leaf - bool replaced = false; - if (overfull.count(*i)) { - for (auto item : underfull) { - ldout(cct, 10) << __func__ << " pos " << pos - << " was " << *i << " considering " << item - << dendl; - if (used.count(item)) { - ldout(cct, 20) << __func__ << " in used " << used << dendl; - continue; - } - if (!subtree_contains(from, item)) { - ldout(cct, 20) << __func__ << " not in subtree " << from << dendl; - continue; - } - if (std::find(orig.begin(), orig.end(), item) != orig.end()) { - ldout(cct, 20) << __func__ << " in orig " << orig << dendl; - continue; - } - o.push_back(item); - used.insert(item); - ldout(cct, 10) << __func__ << " pos " << pos << " replace " - << *i << " -> " << item << dendl; - replaced = true; - ++i; - break; - } - } - if (!replaced) { - ldout(cct, 10) << __func__ << " pos " << pos << " keep " << *i - << dendl; - o.push_back(*i); - ++i; - } - if (i == orig.end()) { - ldout(cct, 10) << __func__ << " end of orig, break 1" << dendl; - break; - } - } - } - if (j + 1 < stack.size()) { - // check if any buckets have overfull leaves but no underfull candidates - for (int pos = 0; pos < fanout; ++pos) { - if (underfull_buckets[j].count(o[pos]) == 0) { - // are any leaves overfull? - bool any_overfull = false; - for (auto osd : leaves[pos]) { - if (overfull.count(osd)) { - any_overfull = true; - } - } - if (any_overfull) { - ldout(cct, 10) << " bucket " << o[pos] << " has no underfull targets and " - << ">0 leaves " << leaves[pos] << " is overfull; alts " - << underfull_buckets[j] - << dendl; - for (auto alt : underfull_buckets[j]) { - if (std::find(o.begin(), o.end(), alt) == o.end()) { - // see if alt has the same parent - if (j == 0 || - get_parent_of_type(o[pos], stack[j-1].first) == - get_parent_of_type(alt, stack[j-1].first)) { - if (j) - ldout(cct, 10) << " replacing " << o[pos] - << " (which has no underfull leaves) with " << alt - << " (same parent " - << get_parent_of_type(alt, stack[j-1].first) << " type " - << type << ")" << dendl; - else - ldout(cct, 10) << " replacing " << o[pos] - << " (which has no underfull leaves) with " << alt - << " (first level)" << dendl; - o[pos] = alt; - break; - } else { - ldout(cct, 30) << " alt " << alt << " for " << o[pos] - << " has different parent, skipping" << dendl; - } - } - } - } - } - } - } - if (i == orig.end()) { - ldout(cct, 10) << __func__ << " end of orig, break 2" << dendl; - break; - } - } - ldout(cct, 10) << __func__ << " w <- " << o << " was " << w << dendl; - w.swap(o); - } - *pw = w; - return 0; -} - -int CrushWrapper::try_remap_rule( - CephContext *cct, - int ruleno, - int maxout, - const set& overfull, - const vector& underfull, - const vector& orig, - vector *out) const -{ - const crush_map *map = crush; - const crush_rule *rule = get_rule(ruleno); - assert(rule); - - ldout(cct, 10) << __func__ << " ruleno " << ruleno - << " numrep " << maxout << " overfull " << overfull - << " underfull " << underfull << " orig " << orig - << dendl; - vector w; // working set - out->clear(); - - auto i = orig.begin(); - set used; - - vector> type_stack; // (type, fan-out) - - for (unsigned step = 0; step < rule->len; ++step) { - const crush_rule_step *curstep = &rule->steps[step]; - ldout(cct, 10) << __func__ << " step " << step << " w " << w << dendl; - switch (curstep->op) { - case CRUSH_RULE_TAKE: - if ((curstep->arg1 >= 0 && curstep->arg1 < map->max_devices) || - (-1-curstep->arg1 >= 0 && -1-curstep->arg1 < map->max_buckets && - map->buckets[-1-curstep->arg1])) { - w.clear(); - w.push_back(curstep->arg1); - ldout(cct, 10) << __func__ << " take " << w << dendl; - } else { - ldout(cct, 1) << " bad take value " << curstep->arg1 << dendl; - } - break; - - case CRUSH_RULE_CHOOSELEAF_FIRSTN: - case CRUSH_RULE_CHOOSELEAF_INDEP: - { - int numrep = curstep->arg1; - int type = curstep->arg2; - if (numrep <= 0) - numrep += maxout; - type_stack.push_back(make_pair(type, numrep)); - type_stack.push_back(make_pair(0, 1)); - int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig, - i, used, &w); - if (r < 0) - return r; - type_stack.clear(); - } - break; - - case CRUSH_RULE_CHOOSE_FIRSTN: - case CRUSH_RULE_CHOOSE_INDEP: - { - int numrep = curstep->arg1; - int type = curstep->arg2; - if (numrep <= 0) - numrep += maxout; - type_stack.push_back(make_pair(type, numrep)); - } - break; - - case CRUSH_RULE_EMIT: - ldout(cct, 10) << " emit " << w << dendl; - if (!type_stack.empty()) { - int r = _choose_type_stack(cct, type_stack, overfull, underfull, orig, - i, used, &w); - if (r < 0) - return r; - type_stack.clear(); - } - for (auto item : w) { - out->push_back(item); - } - w.clear(); - break; - - default: - // ignore - break; - } - } - - return 0; -} - - -int CrushWrapper::_choose_args_adjust_item_weight_in_bucket( - CephContext *cct, - crush_choose_arg_map cmap, - int bucketid, - int id, - const vector& weight, - ostream *ss) -{ - int changed = 0; - int bidx = -1 - bucketid; - crush_bucket *b = crush->buckets[bidx]; - if (bidx >= (int)cmap.size) { - if (ss) - *ss << "no weight-set for bucket " << b->id; - ldout(cct, 10) << __func__ << " no crush_choose_arg for bucket " << b->id - << dendl; - return 0; - } - crush_choose_arg *carg = &cmap.args[bidx]; - if (carg->weight_set == NULL) { - if (ss) - *ss << "no weight-set for bucket " << b->id; - ldout(cct, 10) << __func__ << " no weight_set for bucket " << b->id - << dendl; - return 0; - } - if (carg->weight_set_size != weight.size()) { - if (ss) - *ss << "weight_set_size != " << weight.size() << " for bucket " << b->id; - ldout(cct, 10) << __func__ << " weight_set_size != " << weight.size() - << " for bucket " << b->id << dendl; - return 0; - } - for (unsigned i = 0; i < b->size; i++) { - if (b->items[i] == id) { - for (unsigned j = 0; j < weight.size(); ++j) { - carg->weight_set[j].weights[i] = weight[j]; - } - ldout(cct, 5) << __func__ << " set " << id << " to " << weight - << " in bucket " << b->id << dendl; - changed++; - } - } - if (changed) { - vector bucket_weight(weight.size(), 0); - for (unsigned i = 0; i < b->size; i++) { - for (unsigned j = 0; j < weight.size(); ++j) { - bucket_weight[j] += carg->weight_set[j].weights[i]; - } - } - choose_args_adjust_item_weight(cct, cmap, b->id, bucket_weight, nullptr); - } - return changed; -} - -int CrushWrapper::choose_args_adjust_item_weight( - CephContext *cct, - crush_choose_arg_map cmap, - int id, - const vector& weight, - ostream *ss) -{ - ldout(cct, 5) << __func__ << " " << id << " weight " << weight << dendl; - int changed = 0; - for (int bidx = 0; bidx < crush->max_buckets; bidx++) { - crush_bucket *b = crush->buckets[bidx]; - if (b == nullptr) { - continue; - } - changed += _choose_args_adjust_item_weight_in_bucket( - cct, cmap, b->id, id, weight, ss); - } - if (!changed) { - if (ss) - *ss << "item " << id << " not found in crush map"; - return -ENOENT; - } - return changed; -}