1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2013 Inktank <info@inktank.com>
8 * LGPL2.1 (see COPYING-LGPL2.1) or later
13 #include <gtest/gtest.h>
15 #include "include/stringify.h"
17 #include "crush/CrushWrapper.h"
18 #include "osd/osd_types.h"
22 std::unique_ptr<CrushWrapper> build_indep_map(CephContext *cct, int num_rack,
23 int num_host, int num_osd)
25 std::unique_ptr<CrushWrapper> c(new CrushWrapper);
28 c->set_type_name(5, "root");
29 c->set_type_name(4, "row");
30 c->set_type_name(3, "rack");
31 c->set_type_name(2, "chasis");
32 c->set_type_name(1, "host");
33 c->set_type_name(0, "osd");
36 c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
37 5, 0, NULL, NULL, &rootno);
38 c->set_item_name(rootno, "default");
40 map<string,string> loc;
41 loc["root"] = "default";
44 for (int r=0; r<num_rack; ++r) {
45 loc["rack"] = string("rack-") + stringify(r);
46 for (int h=0; h<num_host; ++h) {
47 loc["host"] = string("host-") + stringify(r) + string("-") + stringify(h);
48 for (int o=0; o<num_osd; ++o, ++osd) {
49 c->insert_item(cct, osd, 1.0, string("osd.") + stringify(osd), loc);
55 ret = c->add_rule(ruleno, 4, 123, 1, 20);
56 assert(ret == ruleno);
57 ret = c->set_rule_step(ruleno, 0, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 10, 0);
59 ret = c->set_rule_step(ruleno, 1, CRUSH_RULE_TAKE, rootno, 0);
61 ret = c->set_rule_step(ruleno, 2, CRUSH_RULE_CHOOSELEAF_INDEP, CRUSH_CHOOSE_N, 1);
63 ret = c->set_rule_step(ruleno, 3, CRUSH_RULE_EMIT, 0, 0);
65 c->set_rule_name(ruleno, "data");
70 Formatter *f = Formatter::create("json-pretty");
71 f->open_object_section("crush_map");
81 int get_num_dups(const vector<int>& v)
85 for (unsigned i=0; i<v.size(); ++i) {
88 else if (v[i] != CRUSH_ITEM_NONE)
94 TEST(CRUSH, indep_toosmall) {
95 std::unique_ptr<CrushWrapper> c(build_indep_map(g_ceph_context, 1, 3, 1));
96 vector<__u32> weight(c->get_max_devices(), 0x10000);
97 c->dump_tree(&cout, NULL);
99 for (int x = 0; x < 100; ++x) {
101 c->do_rule(0, x, out, 5, weight, 0);
102 cout << x << " -> " << out << std::endl;
104 for (unsigned i=0; i<out.size(); ++i) {
105 if (out[i] == CRUSH_ITEM_NONE)
108 ASSERT_EQ(2, num_none);
109 ASSERT_EQ(0, get_num_dups(out));
113 TEST(CRUSH, indep_basic) {
114 std::unique_ptr<CrushWrapper> c(build_indep_map(g_ceph_context, 3, 3, 3));
115 vector<__u32> weight(c->get_max_devices(), 0x10000);
116 c->dump_tree(&cout, NULL);
118 for (int x = 0; x < 100; ++x) {
120 c->do_rule(0, x, out, 5, weight, 0);
121 cout << x << " -> " << out << std::endl;
123 for (unsigned i=0; i<out.size(); ++i) {
124 if (out[i] == CRUSH_ITEM_NONE)
127 ASSERT_EQ(0, num_none);
128 ASSERT_EQ(0, get_num_dups(out));
132 TEST(CRUSH, indep_out_alt) {
133 std::unique_ptr<CrushWrapper> c(build_indep_map(g_ceph_context, 3, 3, 3));
134 vector<__u32> weight(c->get_max_devices(), 0x10000);
136 // mark a bunch of osds out
138 for (int i=0; i<num / 2; ++i)
140 c->dump_tree(&cout, NULL);
142 // need more retries to get 9/9 hosts for x in 0..99
143 c->set_choose_total_tries(100);
144 for (int x = 0; x < 100; ++x) {
146 c->do_rule(0, x, out, 9, weight, 0);
147 cout << x << " -> " << out << std::endl;
149 for (unsigned i=0; i<out.size(); ++i) {
150 if (out[i] == CRUSH_ITEM_NONE)
153 ASSERT_EQ(0, num_none);
154 ASSERT_EQ(0, get_num_dups(out));
158 TEST(CRUSH, indep_out_contig) {
159 std::unique_ptr<CrushWrapper> c(build_indep_map(g_ceph_context, 3, 3, 3));
160 vector<__u32> weight(c->get_max_devices(), 0x10000);
162 // mark a bunch of osds out
164 for (int i=0; i<num / 3; ++i)
166 c->dump_tree(&cout, NULL);
168 c->set_choose_total_tries(100);
169 for (int x = 0; x < 100; ++x) {
171 c->do_rule(0, x, out, 7, weight, 0);
172 cout << x << " -> " << out << std::endl;
174 for (unsigned i=0; i<out.size(); ++i) {
175 if (out[i] == CRUSH_ITEM_NONE)
178 ASSERT_EQ(1, num_none);
179 ASSERT_EQ(0, get_num_dups(out));
184 TEST(CRUSH, indep_out_progressive) {
185 std::unique_ptr<CrushWrapper> c(build_indep_map(g_ceph_context, 3, 3, 3));
186 c->set_choose_total_tries(100);
187 vector<__u32> tweight(c->get_max_devices(), 0x10000);
188 c->dump_tree(&cout, NULL);
191 for (int x = 1; x < 5; ++x) {
192 vector<__u32> weight(c->get_max_devices(), 0x10000);
194 std::map<int,unsigned> pos;
196 for (unsigned i=0; i<weight.size(); ++i) {
198 c->do_rule(0, x, out, 7, weight, 0);
199 cout << "(" << i << "/" << weight.size() << " out) "
200 << x << " -> " << out << std::endl;
202 for (unsigned k=0; k<out.size(); ++k) {
203 if (out[k] == CRUSH_ITEM_NONE)
206 ASSERT_EQ(0, get_num_dups(out));
208 // make sure nothing moved
211 for (unsigned j=0; j<out.size(); ++j) {
212 if (i && out[j] != prev[j]) {
216 if (out[j] == CRUSH_ITEM_NONE) {
219 if (i && pos.count(out[j])) {
220 // result shouldn't have moved position
221 if (j != pos[out[j]]) {
222 cout << " " << out[j] << " moved from " << pos[out[j]] << " to " << j << std::endl;
225 //ASSERT_EQ(j, pos[out[j]]);
228 if (moved || changed)
229 cout << " " << moved << " moved, " << changed << " changed" << std::endl;
231 ASSERT_LE(changed, 3);
233 // mark another osd out
237 for (unsigned j=0; j<out.size(); ++j) {
238 if (out[j] != CRUSH_ITEM_NONE)
243 cout << tchanged << " total changed" << std::endl;
247 TEST(CRUSH, straw_zero) {
248 // zero weight items should have no effect on placement.
250 std::unique_ptr<CrushWrapper> c(new CrushWrapper);
251 const int ROOT_TYPE = 1;
252 c->set_type_name(ROOT_TYPE, "root");
253 const int OSD_TYPE = 0;
254 c->set_type_name(OSD_TYPE, "osd");
257 int items[n], weights[n];
258 for (int i=0; i <n; ++i) {
260 weights[i] = 0x10000 * (n-i-1);
263 c->set_max_devices(n);
265 string root_name0("root0");
267 EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
268 ROOT_TYPE, n, items, weights, &root0));
269 EXPECT_EQ(0, c->set_item_name(root0, root_name0));
271 string name0("rule0");
272 int rule0 = c->add_simple_rule(name0, root_name0, "osd", "",
273 "firstn", pg_pool_t::TYPE_REPLICATED);
276 string root_name1("root1");
278 EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
279 ROOT_TYPE, n-1, items, weights, &root1));
280 EXPECT_EQ(0, c->set_item_name(root1, root_name1));
282 string name1("rule1");
283 int rule1 = c->add_simple_rule(name1, root_name1, "osd", "",
284 "firstn", pg_pool_t::TYPE_REPLICATED);
289 vector<unsigned> reweight(n, 0x10000);
290 for (int i=0; i<10000; ++i) {
291 vector<int> out0, out1;
292 c->do_rule(rule0, i, out0, 1, reweight, 0);
293 ASSERT_EQ(1u, out0.size());
294 c->do_rule(rule1, i, out1, 1, reweight, 0);
295 ASSERT_EQ(1u, out1.size());
296 ASSERT_EQ(out0[0], out1[0]);
297 //cout << i << "\t" << out0 << "\t" << out1 << std::endl;
301 TEST(CRUSH, straw_same) {
302 // items with the same weight should map about the same as items
303 // with very similar weights.
305 // give the 0 vector a paired stair pattern, with dup weights. note
306 // that the original straw flaw does not appear when there are 2 of
307 // the initial weight, but it does when there is just 1.
309 // give the 1 vector a similar stair pattern, but make the same
310 // steps weights slightly different (no dups). this works.
312 // compare the result and verify that the resulting mapping is
315 std::unique_ptr<CrushWrapper> c(new CrushWrapper);
316 const int ROOT_TYPE = 1;
317 c->set_type_name(ROOT_TYPE, "root");
318 const int OSD_TYPE = 0;
319 c->set_type_name(OSD_TYPE, "osd");
322 int items[n], weights[n];
323 for (int i=0; i <n; ++i) {
325 weights[i] = 0x10000 * ((i+1)/2 + 1);
328 c->set_max_devices(n);
330 string root_name0("root0");
332 EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
333 ROOT_TYPE, n, items, weights, &root0));
334 EXPECT_EQ(0, c->set_item_name(root0, root_name0));
336 string name0("rule0");
337 int rule0 = c->add_simple_rule(name0, root_name0, "osd", "",
338 "firstn", pg_pool_t::TYPE_REPLICATED);
341 for (int i=0; i <n; ++i) {
343 weights[i] = 0x10000 * ((i+1)/2 + 1) + (i%2)*100;
346 string root_name1("root1");
348 EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1,
349 ROOT_TYPE, n, items, weights, &root1));
350 EXPECT_EQ(0, c->set_item_name(root1, root_name1));
352 string name1("rule1");
353 int rule1 = c->add_simple_rule(name1, root_name1, "osd", "",
354 "firstn", pg_pool_t::TYPE_REPLICATED);
358 crush_bucket_straw *sb0 = reinterpret_cast<crush_bucket_straw*>(c->get_crush_map()->buckets[-1-root0]);
359 crush_bucket_straw *sb1 = reinterpret_cast<crush_bucket_straw*>(c->get_crush_map()->buckets[-1-root1]);
361 for (int i=0; i<n; ++i) {
363 << "\t" << sb0->item_weights[i]
364 << "\t" << sb1->item_weights[i]
366 << "\t" << sb0->straws[i]
367 << "\t" << sb1->straws[i]
373 JSONFormatter jf(true);
374 jf.open_object_section("crush");
382 vector<int> sum0(n, 0), sum1(n, 0);
383 vector<unsigned> reweight(n, 0x10000);
386 for (int i=0; i<max; ++i) {
387 vector<int> out0, out1;
388 c->do_rule(rule0, i, out0, 1, reweight, 0);
389 ASSERT_EQ(1u, out0.size());
390 c->do_rule(rule1, i, out1, 1, reweight, 0);
391 ASSERT_EQ(1u, out1.size());
394 if (out0[0] != out1[0])
397 for (int i=0; i<n; ++i) {
399 << "\t" << ((double)weights[i] / (double)weights[0])
400 << "\t" << sum0[i] << "\t" << ((double)sum0[i]/(double)sum0[0])
401 << "\t" << sum1[i] << "\t" << ((double)sum1[i]/(double)sum1[0])
404 double ratio = ((double)different / (double)max);
405 cout << different << " of " << max << " = "
407 << " different" << std::endl;
408 ASSERT_LT(ratio, .001);
411 double calc_straw2_stddev(int *weights, int n, bool verbose)
413 std::unique_ptr<CrushWrapper> c(new CrushWrapper);
414 const int ROOT_TYPE = 2;
415 c->set_type_name(ROOT_TYPE, "root");
416 const int HOST_TYPE = 1;
417 c->set_type_name(HOST_TYPE, "host");
418 const int OSD_TYPE = 0;
419 c->set_type_name(OSD_TYPE, "osd");
422 for (int i=0; i <n; ++i) {
426 c->set_max_devices(n);
428 string root_name0("root0");
430 crush_bucket *b0 = crush_make_bucket(c->get_crush_map(),
431 CRUSH_BUCKET_STRAW2, CRUSH_HASH_RJENKINS1,
432 ROOT_TYPE, n, items, weights);
433 crush_add_bucket(c->get_crush_map(), 0, b0, &root0);
434 c->set_item_name(root0, root_name0);
436 string name0("rule0");
437 int rule0 = c->add_simple_rule(name0, root_name0, "osd", "",
438 "firstn", pg_pool_t::TYPE_REPLICATED);
441 double totalweight = 0;
442 vector<unsigned> reweight(n);
443 for (int i=0; i<n; ++i) {
445 reweight[i] = 0x10000;
446 totalweight += weights[i];
448 totalweight /= (double)0x10000;
449 double avgweight = totalweight / n;
454 for (int i=0; i<total; ++i) {
456 c->do_rule(rule0, i, out, 1, reweight, 0);
460 double expected = (double)total / (double)n;
462 cout << "expect\t\t\t" << expected << std::endl;
466 cout << "osd\tweight\tcount\tadjusted\n";
467 std::streamsize p = cout.precision();
468 cout << std::setprecision(4);
469 for (int i=0; i<n; ++i) {
470 double w = (double)weights[i] / (double)0x10000;
471 double adj = (double)sum[i] * avgweight / w;
472 stddev += (adj - expected) * (adj - expected);
481 cout << std::setprecision(p);
483 stddev = sqrt(stddev / (double)n);
485 cout << "std dev " << stddev << std::endl;
487 double p = 1.0 / (double)n;
488 double estddev = sqrt(exptotal * p * (1.0 - p));
490 cout << " vs " << estddev << "\t(expected)" << std::endl;
495 TEST(CRUSH, straw2_stddev)
499 cout << "maxskew\tstddev\n";
500 for (double step = 1.0; step < 2; step += .25) {
502 for (int i = 0; i < n; ++i) {
506 double stddev = calc_straw2_stddev(weights, n, true);
507 cout << ((double)weights[n-1]/(double)weights[0])
508 << "\t" << stddev << std::endl;
512 TEST(CRUSH, straw2_reweight) {
513 // when we adjust the weight of an item in a straw2 bucket,
514 // we should *only* see movement from or to that item, never
515 // between other items.
537 std::unique_ptr<CrushWrapper> c(new CrushWrapper);
538 const int ROOT_TYPE = 2;
539 c->set_type_name(ROOT_TYPE, "root");
540 const int HOST_TYPE = 1;
541 c->set_type_name(HOST_TYPE, "host");
542 const int OSD_TYPE = 0;
543 c->set_type_name(OSD_TYPE, "osd");
546 for (int i=0; i <n; ++i) {
548 //weights[i] = 0x10000;
551 c->set_max_devices(n);
553 string root_name0("root0");
555 crush_bucket *b0 = crush_make_bucket(c->get_crush_map(),
556 CRUSH_BUCKET_STRAW2, CRUSH_HASH_RJENKINS1,
557 ROOT_TYPE, n, items, weights);
558 EXPECT_EQ(0, crush_add_bucket(c->get_crush_map(), 0, b0, &root0));
559 EXPECT_EQ(0, c->set_item_name(root0, root_name0));
561 string name0("rule0");
562 int rule0 = c->add_simple_rule(name0, root_name0, "osd", "",
563 "firstn", pg_pool_t::TYPE_REPLICATED);
567 weights[changed] = weights[changed] / 10 * (rand() % 10);
569 string root_name1("root1");
571 crush_bucket *b1 = crush_make_bucket(c->get_crush_map(),
572 CRUSH_BUCKET_STRAW2, CRUSH_HASH_RJENKINS1,
573 ROOT_TYPE, n, items, weights);
574 EXPECT_EQ(0, crush_add_bucket(c->get_crush_map(), 0, b1, &root1));
575 EXPECT_EQ(0, c->set_item_name(root1, root_name1));
577 string name1("rule1");
578 int rule1 = c->add_simple_rule(name1, root_name1, "osd", "",
579 "firstn", pg_pool_t::TYPE_REPLICATED);
583 double totalweight = 0;
584 vector<unsigned> reweight(n);
585 for (int i=0; i<n; ++i) {
587 reweight[i] = 0x10000;
588 totalweight += weights[i];
590 totalweight /= (double)0x10000;
591 double avgweight = totalweight / n;
596 for (int i=0; i<total; ++i) {
597 vector<int> out0, out1;
598 c->do_rule(rule0, i, out0, 1, reweight, 0);
599 ASSERT_EQ(1u, out0.size());
601 c->do_rule(rule1, i, out1, 1, reweight, 0);
602 ASSERT_EQ(1u, out1.size());
607 if (out1[0] == changed) {
608 ASSERT_EQ(changed, out0[0]);
609 } else if (out0[0] != changed) {
610 ASSERT_EQ(out0[0], out1[0]);
614 double expected = (double)total / (double)n;
615 cout << "expect\t\t\t" << expected << std::endl;
617 cout << "osd\tweight\tcount\tadjusted\n";
618 std::streamsize p = cout.precision();
619 cout << std::setprecision(4);
620 for (int i=0; i<n; ++i) {
621 double w = (double)weights[i] / (double)0x10000;
622 double adj = (double)sum[i] * avgweight / w;
623 stddev += (adj - expected) * (adj - expected);
630 cout << std::setprecision(p);
632 stddev = sqrt(stddev / (double)n);
633 cout << "std dev " << stddev << std::endl;
635 double p = 1.0 / (double)n;
636 double estddev = sqrt((double)total * p * (1.0 - p));
637 cout << " vs " << estddev << std::endl;