1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
18 #include "common/ceph_argparse.h"
19 #include "common/errno.h"
20 #include "common/safe_io.h"
21 #include "mon/health_check.h"
23 #include "global/global_init.h"
24 #include "osd/OSDMap.h"
30 cout << " usage: [--print] [--createsimple <numosd> [--clobber] [--pg_bits <bitsperosd>]] <mapfilename>" << std::endl;
31 cout << " --export-crush <file> write osdmap's crush map to <file>" << std::endl;
32 cout << " --import-crush <file> replace osdmap's crush map with <file>" << std::endl;
33 cout << " --test-map-pgs [--pool <poolid>] [--pg_num <pg_num>] map all pgs" << std::endl;
34 cout << " --test-map-pgs-dump [--pool <poolid>] map all pgs" << std::endl;
35 cout << " --test-map-pgs-dump-all [--pool <poolid>] map all pgs to osds" << std::endl;
36 cout << " --health dump health checks" << std::endl;
37 cout << " --mark-up-in mark osds up and in (but do not persist)" << std::endl;
38 cout << " --mark-out <osdid> mark an osd as out (but do not persist)" << std::endl;
39 cout << " --with-default-pool include default pool when creating map" << std::endl;
40 cout << " --clear-temp clear pg_temp and primary_temp" << std::endl;
41 cout << " --test-random do random placements" << std::endl;
42 cout << " --test-map-pg <pgid> map a pgid to osds" << std::endl;
43 cout << " --test-map-object <objectname> [--pool <poolid>] map an object to osds"
45 cout << " --upmap-cleanup <file> clean up pg_upmap[_items] entries, writing" << std::endl;
46 cout << " commands to <file> [default: - for stdout]" << std::endl;
47 cout << " --upmap <file> calculate pg upmap entries to balance pg layout" << std::endl;
48 cout << " writing commands to <file> [default: - for stdout]" << std::endl;
49 cout << " --upmap-max <max-count> set max upmap entries to calculate [default: 100]" << std::endl;
50 cout << " --upmap-deviation <max-deviation>" << std::endl;
51 cout << " max deviation from target [default: .01]" << std::endl;
52 cout << " --upmap-pool <poolname> restrict upmap balancing to 1 or more pools" << std::endl;
53 cout << " --upmap-save write modified OSDMap with upmap changes" << std::endl;
57 void print_inc_upmaps(const OSDMap::Incremental& pending_inc, int fd)
60 for (auto& i : pending_inc.old_pg_upmap) {
61 ss << "ceph osd rm-pg-upmap " << i << std::endl;
63 for (auto& i : pending_inc.new_pg_upmap) {
64 ss << "ceph osd pg-upmap " << i.first;
65 for (auto osd : i.second) {
70 for (auto& i : pending_inc.old_pg_upmap_items) {
71 ss << "ceph osd rm-pg-upmap-items " << i << std::endl;
73 for (auto& i : pending_inc.new_pg_upmap_items) {
74 ss << "ceph osd pg-upmap-items " << i.first;
75 for (auto p : i.second) {
76 ss << " " << p.first << " " << p.second;
81 int r = safe_write(fd, s.c_str(), s.size());
83 cerr << "error writing output: " << cpp_strerror(r) << std::endl;
88 int main(int argc, const char **argv)
90 vector<const char*> args;
91 argv_to_vec(argc, argv, args);
94 auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
95 CODE_ENVIRONMENT_UTILITY,
96 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
97 common_init_finish(g_ceph_context);
99 const char *me = argv[0];
103 boost::scoped_ptr<Formatter> print_formatter;
105 boost::scoped_ptr<Formatter> tree_formatter;
106 bool createsimple = false;
107 bool createpool = false;
108 bool create_from_conf = false;
110 int pg_bits = g_conf->osd_pg_bits;
111 int pgp_bits = g_conf->osd_pgp_bits;
112 bool clobber = false;
113 bool modified = false;
114 std::string export_crush, import_crush, test_map_pg, test_map_object;
115 bool test_crush = false;
116 int range_first = -1;
119 bool mark_up_in = false;
121 bool clear_temp = false;
122 bool test_map_pgs = false;
123 bool test_map_pgs_dump = false;
124 bool test_random = false;
125 bool upmap_cleanup = false;
127 bool upmap_save = false;
129 std::string upmap_file = "-";
131 float upmap_deviation = .01;
132 std::set<std::string> upmap_pools;
134 bool test_map_pgs_dump_all = false;
137 std::ostringstream err;
138 for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) {
139 if (ceph_argparse_double_dash(args, i)) {
141 } else if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
143 } else if (ceph_argparse_flag(args, i, "-p", "--print", (char*)NULL)) {
145 } else if (ceph_argparse_witharg(args, i, &val, err, "--dump", (char*)NULL)) {
147 if (!val.empty() && val != "plain") {
148 print_formatter.reset(Formatter::create(val, "", "json"));
150 } else if (ceph_argparse_witharg(args, i, &val, err, "--tree", (char*)NULL)) {
152 if (!val.empty() && val != "plain") {
153 tree_formatter.reset(Formatter::create(val, "", "json"));
155 } else if (ceph_argparse_witharg(args, i, &upmap_file, "--upmap-cleanup", (char*)NULL)) {
156 upmap_cleanup = true;
157 } else if (ceph_argparse_witharg(args, i, &upmap_file, "--upmap-save", (char*)NULL)) {
159 } else if (ceph_argparse_witharg(args, i, &upmap_file, "--upmap", (char*)NULL)) {
160 upmap_cleanup = true;
162 } else if (ceph_argparse_witharg(args, i, &upmap_max, err, "--upmap-max", (char*)NULL)) {
163 } else if (ceph_argparse_witharg(args, i, &upmap_deviation, err, "--upmap-deviation", (char*)NULL)) {
164 } else if (ceph_argparse_witharg(args, i, &val, "--upmap-pool", (char*)NULL)) {
165 upmap_pools.insert(val);
166 } else if (ceph_argparse_witharg(args, i, &num_osd, err, "--createsimple", (char*)NULL)) {
167 if (!err.str().empty()) {
168 cerr << err.str() << std::endl;
172 } else if (ceph_argparse_flag(args, i, "--health", (char*)NULL)) {
174 } else if (ceph_argparse_flag(args, i, "--with-default-pool", (char*)NULL)) {
176 } else if (ceph_argparse_flag(args, i, "--create-from-conf", (char*)NULL)) {
177 create_from_conf = true;
178 } else if (ceph_argparse_flag(args, i, "--mark-up-in", (char*)NULL)) {
180 } else if (ceph_argparse_witharg(args, i, &val, "--mark-out", (char*)NULL)) {
181 marked_out = std::stoi(val);
182 } else if (ceph_argparse_flag(args, i, "--clear-temp", (char*)NULL)) {
184 } else if (ceph_argparse_flag(args, i, "--test-map-pgs", (char*)NULL)) {
186 } else if (ceph_argparse_flag(args, i, "--test-map-pgs-dump", (char*)NULL)) {
187 test_map_pgs_dump = true;
188 } else if (ceph_argparse_flag(args, i, "--test-map-pgs-dump-all", (char*)NULL)) {
189 test_map_pgs_dump_all = true;
190 } else if (ceph_argparse_flag(args, i, "--test-random", (char*)NULL)) {
192 } else if (ceph_argparse_flag(args, i, "--clobber", (char*)NULL)) {
194 } else if (ceph_argparse_witharg(args, i, &pg_bits, err, "--pg_bits", (char*)NULL)) {
195 if (!err.str().empty()) {
196 cerr << err.str() << std::endl;
199 } else if (ceph_argparse_witharg(args, i, &pgp_bits, err, "--pgp_bits", (char*)NULL)) {
200 if (!err.str().empty()) {
201 cerr << err.str() << std::endl;
204 } else if (ceph_argparse_witharg(args, i, &val, "--export_crush", (char*)NULL)) {
206 } else if (ceph_argparse_witharg(args, i, &val, "--import_crush", (char*)NULL)) {
208 } else if (ceph_argparse_witharg(args, i, &val, "--test_map_pg", (char*)NULL)) {
210 } else if (ceph_argparse_witharg(args, i, &val, "--test_map_object", (char*)NULL)) {
211 test_map_object = val;
212 } else if (ceph_argparse_flag(args, i, "--test_crush", (char*)NULL)) {
214 } else if (ceph_argparse_witharg(args, i, &val, err, "--pg_num", (char*)NULL)) {
216 pg_num = strict_strtoll(val.c_str(), 10, &interr);
217 if (interr.length() > 0) {
218 cerr << "error parsing integer value " << interr << std::endl;
221 } else if (ceph_argparse_witharg(args, i, &range_first, err, "--range_first", (char*)NULL)) {
222 } else if (ceph_argparse_witharg(args, i, &range_last, err, "--range_last", (char*)NULL)) {
223 } else if (ceph_argparse_witharg(args, i, &pool, err, "--pool", (char*)NULL)) {
224 if (!err.str().empty()) {
225 cerr << err.str() << std::endl;
233 cerr << me << ": must specify osdmap filename" << std::endl;
236 else if (args.size() > 1) {
237 cerr << me << ": too many arguments" << std::endl;
242 if (range_first >= 0 && range_last >= 0) {
245 for (int i=range_first; i <= range_last; i++) {
249 string error, s = f.str();
250 int r = bl.read_file(s.c_str(), &error);
252 cerr << "unable to read " << s << ": " << cpp_strerror(r) << std::endl;
255 cout << s << " got " << bl.length() << " bytes" << std::endl;
256 OSDMap *o = new OSDMap;
260 OSDMap::dedup(prev, o);
269 cerr << me << ": osdmap file '" << fn << "'" << std::endl;
273 if (!createsimple && !create_from_conf && !clobber) {
275 r = bl.read_file(fn.c_str(), &error);
280 catch (const buffer::error &e) {
281 cerr << me << ": error decoding osdmap '" << fn << "'" << std::endl;
286 cerr << me << ": couldn't open " << fn << ": " << error << std::endl;
290 else if ((createsimple || create_from_conf) && !clobber && ::stat(fn.c_str(), &st) == 0) {
291 cerr << me << ": " << fn << " exists, --clobber to overwrite" << std::endl;
295 if (createsimple || create_from_conf) {
298 cerr << me << ": osd count must be > 0" << std::endl;
305 memset(&fsid, 0, sizeof(uuid_d));
307 osdmap.build_simple_with_pool(
308 g_ceph_context, 0, fsid, num_osd, pg_bits, pgp_bits);
310 osdmap.build_simple(g_ceph_context, 0, fsid, num_osd);
316 cout << "marking all OSDs up and in" << std::endl;
317 int n = osdmap.get_max_osd();
318 for (int i=0; i<n; i++) {
319 osdmap.set_state(i, osdmap.get_state(i) | CEPH_OSD_UP);
320 osdmap.set_weight(i, CEPH_OSD_IN);
321 osdmap.crush->adjust_item_weightf(g_ceph_context, i, 1.0);
325 if (marked_out >=0 && marked_out < osdmap.get_max_osd()) {
326 cout << "marking OSD@" << marked_out << " as out" << std::endl;
328 osdmap.set_state(id, osdmap.get_state(id) | CEPH_OSD_UP);
329 osdmap.set_weight(id, CEPH_OSD_OUT);
330 osdmap.crush->adjust_item_weightf(g_ceph_context, id, 1.0);
334 cout << "clearing pg/primary temp" << std::endl;
337 int upmap_fd = STDOUT_FILENO;
338 if (upmap || upmap_cleanup) {
339 if (upmap_file != "-") {
340 upmap_fd = ::open(upmap_file.c_str(), O_CREAT|O_WRONLY, 0644);
342 cerr << "error opening " << upmap_file << ": " << cpp_strerror(errno)
346 cout << "writing upmap command output to: " << upmap_file << std::endl;
350 cout << "checking for upmap cleanups" << std::endl;
351 OSDMap::Incremental pending_inc(osdmap.get_epoch()+1);
352 pending_inc.fsid = osdmap.get_fsid();
353 int r = osdmap.clean_pg_upmaps(g_ceph_context, &pending_inc);
355 print_inc_upmaps(pending_inc, upmap_fd);
356 r = osdmap.apply_incremental(pending_inc);
361 cout << "upmap, max-count " << upmap_max
362 << ", max deviation " << upmap_deviation
364 OSDMap::Incremental pending_inc(osdmap.get_epoch()+1);
365 pending_inc.fsid = osdmap.get_fsid();
367 for (auto& s : upmap_pools) {
368 int64_t p = osdmap.lookup_pg_pool_name(s);
370 cerr << " pool '" << s << "' does not exist" << std::endl;
376 cout << " limiting to pools " << upmap_pools << " (" << pools << ")"
378 int changed = osdmap.calc_pg_upmaps(
379 g_ceph_context, upmap_deviation,
383 print_inc_upmaps(pending_inc, upmap_fd);
385 int r = osdmap.apply_incremental(pending_inc);
390 cout << "no upmaps proposed" << std::endl;
393 if (upmap_file != "-") {
397 if (!import_crush.empty()) {
400 r = cbl.read_file(import_crush.c_str(), &error);
402 cerr << me << ": error reading crush map from " << import_crush
403 << ": " << error << std::endl;
409 bufferlist::iterator p = cbl.begin();
412 if (cw.get_max_devices() > osdmap.get_max_osd()) {
413 cerr << me << ": crushmap max_devices " << cw.get_max_devices()
414 << " > osdmap max_osd " << osdmap.get_max_osd() << std::endl;
419 OSDMap::Incremental inc;
420 inc.fsid = osdmap.get_fsid();
421 inc.epoch = osdmap.get_epoch()+1;
423 osdmap.apply_incremental(inc);
424 cout << me << ": imported " << cbl.length() << " byte crush map from " << import_crush << std::endl;
428 if (!export_crush.empty()) {
430 osdmap.crush->encode(cbl, CEPH_FEATURES_SUPPORTED_DEFAULT);
431 r = cbl.write_file(export_crush.c_str());
433 cerr << me << ": error writing crush map to " << import_crush << std::endl;
436 cout << me << ": exported crush map to " << export_crush << std::endl;
439 if (!test_map_object.empty()) {
440 object_t oid(test_map_object);
442 cout << me << ": assuming pool 1 (use --pool to override)" << std::endl;
445 if (!osdmap.have_pg_pool(pool)) {
446 cerr << "There is no pool " << pool << std::endl;
449 object_locator_t loc(pool);
450 pg_t raw_pgid = osdmap.object_locator_to_pg(oid, loc);
451 pg_t pgid = osdmap.raw_pg_to_pg(raw_pgid);
454 osdmap.pg_to_acting_osds(pgid, acting);
455 cout << " object '" << oid
460 if (!test_map_pg.empty()) {
462 if (!pgid.parse(test_map_pg.c_str())) {
463 cerr << me << ": failed to parse pg '" << test_map_pg << std::endl;
466 cout << " parsed '" << test_map_pg << "' -> " << pgid << std::endl;
468 vector<int> raw, up, acting;
469 int raw_primary, up_primary, acting_primary;
470 osdmap.pg_to_raw_osds(pgid, &raw, &raw_primary);
471 osdmap.pg_to_up_acting_osds(pgid, &up, &up_primary,
472 &acting, &acting_primary);
473 cout << pgid << " raw (" << raw << ", p" << raw_primary
474 << ") up (" << up << ", p" << up_primary
475 << ") acting (" << acting << ", p" << acting_primary << ")"
478 if (test_map_pgs || test_map_pgs_dump || test_map_pgs_dump_all) {
479 if (pool != -1 && !osdmap.have_pg_pool(pool)) {
480 cerr << "There is no pool " << pool << std::endl;
483 int n = osdmap.get_max_osd();
484 vector<int> count(n, 0);
485 vector<int> first_count(n, 0);
486 vector<int> primary_count(n, 0);
487 vector<int> size(30, 0);
490 auto& pools = osdmap.get_pools();
491 for (auto p = pools.begin(); p != pools.end(); ++p) {
492 if (pool != -1 && p->first != pool)
495 p->second.set_pg_num(pg_num);
497 cout << "pool " << p->first
498 << " pg_num " << p->second.get_pg_num() << std::endl;
499 for (unsigned i = 0; i < p->second.get_pg_num(); ++i) {
500 pg_t pgid = pg_t(i, p->first);
502 vector<int> osds, raw, up, acting;
503 int primary, calced_primary, up_primary, acting_primary;
505 osds.resize(p->second.size);
506 for (unsigned i=0; i<osds.size(); ++i) {
507 osds[i] = rand() % osdmap.get_max_osd();
510 } else if (test_map_pgs_dump_all) {
511 osdmap.pg_to_raw_osds(pgid, &raw, &calced_primary);
512 osdmap.pg_to_up_acting_osds(pgid, &up, &up_primary,
513 &acting, &acting_primary);
515 osdmap.pg_to_acting_osds(pgid, &osds, &primary);
519 if (test_map_pgs_dump) {
520 cout << pgid << "\t" << osds << "\t" << primary << std::endl;
521 } else if (test_map_pgs_dump_all) {
522 cout << pgid << " raw (" << raw << ", p" << calced_primary
523 << ") up (" << up << ", p" << up_primary
524 << ") acting (" << acting << ", p" << acting_primary << ")"
528 for (unsigned i=0; i<osds.size(); i++) {
529 //cout << " rep " << i << " on " << osds[i] << std::endl;
533 first_count[osds[0]]++;
535 primary_count[primary]++;
543 cout << "#osd\tcount\tfirst\tprimary\tc wt\twt\n";
544 for (int i=0; i<n; i++) {
545 if (!osdmap.is_in(i))
547 if (osdmap.crush->get_item_weight(i) <= 0)
552 << "\t" << first_count[i]
553 << "\t" << primary_count[i]
554 << "\t" << osdmap.crush->get_item_weightf(i)
555 << "\t" << osdmap.get_weightf(i)
560 count[i] < count[min_osd]))
564 count[i] > count[max_osd]))
568 uint64_t avg = in ? (total / in) : 0;
570 for (int i=0; i<n; i++) {
571 if (!osdmap.is_in(i))
573 if (osdmap.crush->get_item_weight(i) <= 0)
575 dev += (avg - count[i]) * (avg - count[i]);
580 //double edev = sqrt(pgavg) * (double)avg / pgavg;
581 double edev = sqrt((double)total / (double)in * (1.0 - (1.0 / (double)in)));
582 cout << " in " << in << std::endl;
583 cout << " avg " << avg
585 << " (" << (dev/avg) << "x)"
586 << " (expected " << edev << " " << (edev/avg) << "x))"
590 cout << " min osd." << min_osd << " " << count[min_osd] << std::endl;
592 cout << " max osd." << max_osd << " " << count[max_osd] << std::endl;
594 for (int i=0; i<4; i++) {
595 cout << "size " << i << "\t" << size[i] << std::endl;
601 cout << "pass " << ++pass << std::endl;
603 ceph::unordered_map<pg_t,vector<int> > m;
604 for (map<int64_t,pg_pool_t>::const_iterator p = osdmap.get_pools().begin();
605 p != osdmap.get_pools().end();
607 const pg_pool_t *pool = osdmap.get_pg_pool(p->first);
608 for (ps_t ps = 0; ps < pool->get_pg_num(); ps++) {
609 pg_t pgid(ps, p->first, -1);
610 for (int i=0; i<100; i++) {
611 cout << pgid << " attempt " << i << std::endl;
614 osdmap.pg_to_acting_osds(pgid, r);
615 //cout << pgid << " " << r << std::endl;
618 cout << pgid << " had " << m[pgid] << " now " << r << std::endl;
629 if (!print && !health && !tree && !modified &&
630 export_crush.empty() && import_crush.empty() &&
631 test_map_pg.empty() && test_map_object.empty() &&
632 !test_map_pgs && !test_map_pgs_dump && !test_map_pgs_dump_all &&
633 !upmap && !upmap_cleanup) {
634 cerr << me << ": no action specified?" << std::endl;
642 health_check_map_t checks;
643 osdmap.check_health(&checks);
644 JSONFormatter jf(true);
645 jf.dump_object("checks", checks);
649 if (print_formatter) {
650 print_formatter->open_object_section("osdmap");
651 osdmap.dump(print_formatter.get());
652 print_formatter->close_section();
653 print_formatter->flush(cout);
660 if (tree_formatter) {
661 tree_formatter->open_object_section("tree");
662 osdmap.print_tree(tree_formatter.get(), NULL);
663 tree_formatter->close_section();
664 tree_formatter->flush(cout);
667 osdmap.print_tree(NULL, &cout);
672 osdmap.encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT | CEPH_FEATURE_RESERVED);
675 cout << me << ": writing epoch " << osdmap.get_epoch()
678 int r = bl.write_file(fn.c_str());
680 cerr << "osdmaptool: error writing to '" << fn << "': "
681 << cpp_strerror(r) << std::endl;