8 #include "crush/crush.h"
11 #define dprintk(args...) /* printf(args) */
13 #define BUG_ON(x) assert(!(x))
15 struct crush_map *crush_create()
18 m = malloc(sizeof(*m));
21 memset(m, 0, sizeof(*m));
23 set_optimal_crush_map(m);
28 * finalize should be called _after_ all buckets are added to the map.
30 void crush_finalize(struct crush_map *map)
35 /* Calculate the needed working space while we do other
36 finalization tasks. */
37 map->working_size = sizeof(struct crush_work);
38 /* Space for the array of pointers to per-bucket workspace */
39 map->working_size += map->max_buckets *
40 sizeof(struct crush_work_bucket *);
42 /* calc max_devices */
44 for (b=0; b<map->max_buckets; b++) {
45 if (map->buckets[b] == 0)
47 for (i=0; i<map->buckets[b]->size; i++)
48 if (map->buckets[b]->items[i] >= map->max_devices)
49 map->max_devices = map->buckets[b]->items[i] + 1;
51 switch (map->buckets[b]->alg) {
53 /* The base case, permutation variables and
54 the pointer to the permutation array. */
55 map->working_size += sizeof(struct crush_work_bucket);
58 /* Every bucket has a permutation array. */
59 map->working_size += map->buckets[b]->size * sizeof(__u32);
67 int crush_add_rule(struct crush_map *map, struct crush_rule *rule, int ruleno)
72 for (r=0; r < map->max_rules; r++)
73 if (map->rules[r] == 0)
75 assert(r < CRUSH_MAX_RULES);
80 if (r >= map->max_rules) {
83 void *_realloc = NULL;
84 if (map->max_rules +1 > CRUSH_MAX_RULES)
86 oldsize = map->max_rules;
88 if ((_realloc = realloc(map->rules, map->max_rules * sizeof(map->rules[0]))) == NULL) {
91 map->rules = _realloc;
93 memset(map->rules + oldsize, 0, (map->max_rules-oldsize) * sizeof(map->rules[0]));
101 struct crush_rule *crush_make_rule(int len, int ruleset, int type, int minsize, int maxsize)
103 struct crush_rule *rule;
104 rule = malloc(crush_rule_size(len));
108 rule->mask.ruleset = ruleset;
109 rule->mask.type = type;
110 rule->mask.min_size = minsize;
111 rule->mask.max_size = maxsize;
116 * be careful; this doesn't verify that the buffer you allocated is big enough!
118 void crush_rule_set_step(struct crush_rule *rule, int n, int op, int arg1, int arg2)
120 assert((__u32)n < rule->len);
121 rule->steps[n].op = op;
122 rule->steps[n].arg1 = arg1;
123 rule->steps[n].arg2 = arg2;
128 int crush_get_next_bucket_id(struct crush_map *map)
131 for (pos=0; pos < map->max_buckets; pos++)
132 if (map->buckets[pos] == 0)
138 int crush_add_bucket(struct crush_map *map,
140 struct crush_bucket *bucket,
145 /* find a bucket id */
147 id = crush_get_next_bucket_id(map);
150 while (pos >= map->max_buckets) {
152 int oldsize = map->max_buckets;
153 if (map->max_buckets)
154 map->max_buckets *= 2;
156 map->max_buckets = 8;
157 void *_realloc = NULL;
158 if ((_realloc = realloc(map->buckets, map->max_buckets * sizeof(map->buckets[0]))) == NULL) {
161 map->buckets = _realloc;
163 memset(map->buckets + oldsize, 0, (map->max_buckets-oldsize) * sizeof(map->buckets[0]));
166 if (map->buckets[pos] != 0) {
172 map->buckets[pos] = bucket;
174 if (idout) *idout = id;
178 int crush_remove_bucket(struct crush_map *map, struct crush_bucket *bucket)
180 int pos = -1 - bucket->id;
181 assert(pos < map->max_buckets);
182 map->buckets[pos] = NULL;
183 crush_destroy_bucket(bucket);
190 struct crush_bucket_uniform *
191 crush_make_uniform_bucket(int hash, int type, int size,
196 struct crush_bucket_uniform *bucket;
198 bucket = malloc(sizeof(*bucket));
201 memset(bucket, 0, sizeof(*bucket));
202 bucket->h.alg = CRUSH_BUCKET_UNIFORM;
203 bucket->h.hash = hash;
204 bucket->h.type = type;
205 bucket->h.size = size;
207 if (crush_multiplication_is_unsafe(size, item_weight))
210 bucket->h.weight = size * item_weight;
211 bucket->item_weight = item_weight;
212 bucket->h.items = malloc(sizeof(__s32)*size);
214 if (!bucket->h.items)
217 for (i=0; i<size; i++)
218 bucket->h.items[i] = items[i];
222 free(bucket->h.items);
230 struct crush_bucket_list*
231 crush_make_list_bucket(int hash, int type, int size,
237 struct crush_bucket_list *bucket;
239 bucket = malloc(sizeof(*bucket));
242 memset(bucket, 0, sizeof(*bucket));
243 bucket->h.alg = CRUSH_BUCKET_LIST;
244 bucket->h.hash = hash;
245 bucket->h.type = type;
246 bucket->h.size = size;
248 bucket->h.items = malloc(sizeof(__s32)*size);
249 if (!bucket->h.items)
253 bucket->item_weights = malloc(sizeof(__u32)*size);
254 if (!bucket->item_weights)
256 bucket->sum_weights = malloc(sizeof(__u32)*size);
257 if (!bucket->sum_weights)
260 for (i=0; i<size; i++) {
261 bucket->h.items[i] = items[i];
262 bucket->item_weights[i] = weights[i];
264 if (crush_addition_is_unsafe(w, weights[i]))
268 bucket->sum_weights[i] = w;
269 /*dprintk("pos %d item %d weight %d sum %d\n",
270 i, items[i], weights[i], bucket->sum_weights[i]);*/
273 bucket->h.weight = w;
277 free(bucket->sum_weights);
278 free(bucket->item_weights);
279 free(bucket->h.items);
287 static int height(int n) {
289 while ((n & 1) == 0) {
295 static int on_right(int n, int h) {
296 return n & (1 << (h+1));
298 static int parent(int n)
307 static int calc_depth(int size)
322 struct crush_bucket_tree*
323 crush_make_tree_bucket(int hash, int type, int size,
324 int *items, /* in leaf order */
327 struct crush_bucket_tree *bucket;
332 bucket = malloc(sizeof(*bucket));
335 memset(bucket, 0, sizeof(*bucket));
336 bucket->h.alg = CRUSH_BUCKET_TREE;
337 bucket->h.hash = hash;
338 bucket->h.type = type;
339 bucket->h.size = size;
342 bucket->h.items = NULL;
343 bucket->h.weight = 0;
344 bucket->node_weights = NULL;
345 bucket->num_nodes = 0;
346 /* printf("size 0 depth 0 nodes 0\n"); */
350 bucket->h.items = malloc(sizeof(__s32)*size);
351 if (!bucket->h.items)
354 /* calc tree depth */
355 depth = calc_depth(size);
356 bucket->num_nodes = 1 << depth;
357 dprintk("size %d depth %d nodes %d\n", size, depth, bucket->num_nodes);
359 bucket->node_weights = malloc(sizeof(__u32)*bucket->num_nodes);
360 if (!bucket->node_weights)
363 memset(bucket->h.items, 0, sizeof(__s32)*bucket->h.size);
364 memset(bucket->node_weights, 0, sizeof(__u32)*bucket->num_nodes);
366 for (i=0; i<size; i++) {
367 bucket->h.items[i] = items[i];
368 node = crush_calc_tree_node(i);
369 dprintk("item %d node %d weight %d\n", i, node, weights[i]);
370 bucket->node_weights[node] = weights[i];
372 if (crush_addition_is_unsafe(bucket->h.weight, weights[i]))
375 bucket->h.weight += weights[i];
376 for (j=1; j<depth; j++) {
379 if (crush_addition_is_unsafe(bucket->node_weights[node], weights[i]))
382 bucket->node_weights[node] += weights[i];
383 dprintk(" node %d weight %d\n", node, bucket->node_weights[node]);
386 BUG_ON(bucket->node_weights[bucket->num_nodes/2] != bucket->h.weight);
390 free(bucket->node_weights);
391 free(bucket->h.items);
401 * this code was written 8 years ago. i have a vague recollection of
402 * drawing boxes underneath bars of different lengths, where the bar
403 * length represented the probability/weight, and that there was some
404 * trial and error involved in arriving at this implementation.
405 * however, reading the code now after all this time, the intuition
406 * that motivated is lost on me. lame. my only excuse is that I now
407 * know that the approach is fundamentally flawed and am not
408 * particularly motivated to reconstruct the flawed reasoning.
410 * as best as i can remember, the idea is: sort the weights, and start
411 * with the smallest. arbitrarily scale it at 1.0 (16-bit fixed
412 * point). look at the next larger weight, and calculate the scaling
413 * factor for that straw based on the relative difference in weight so
414 * far. what's not clear to me now is why we are looking at wnext
415 * (the delta to the next bigger weight) for all remaining weights,
416 * and slicing things horizontally instead of considering just the
417 * next item or set of items. or why pow() is used the way it is.
419 * note that the original version 1 of this function made special
420 * accomodation for the case where straw lengths were identical. this
421 * is also flawed in a non-obvious way; version 2 drops the special
422 * handling and appears to work just as well.
424 * moral of the story: if you do something clever, write down why it
427 int crush_calc_straw(struct crush_map *map, struct crush_bucket_straw *bucket)
431 double straw, wbelow, lastw, wnext, pbelow;
433 int size = bucket->h.size;
434 __u32 *weights = bucket->item_weights;
436 /* reverse sort by weight (simple insertion sort) */
437 reverse = malloc(sizeof(int) * size);
442 for (i=1; i<size; i++) {
443 for (j=0; j<i; j++) {
444 if (weights[i] < weights[reverse[j]]) {
447 reverse[k] = reverse[k-1];
463 if (map->straw_calc_version == 0) {
464 /* zero weight items get 0 length straws! */
465 if (weights[reverse[i]] == 0) {
466 bucket->straws[reverse[i]] = 0;
471 /* set this item's straw */
472 bucket->straws[reverse[i]] = straw * 0x10000;
473 dprintk("item %d at %d weight %d straw %d (%lf)\n",
474 bucket->h.items[reverse[i]],
475 reverse[i], weights[reverse[i]],
476 bucket->straws[reverse[i]], straw);
481 /* same weight as previous? */
482 if (weights[reverse[i]] == weights[reverse[i-1]]) {
483 dprintk("same as previous\n");
487 /* adjust straw for next guy */
488 wbelow += ((double)weights[reverse[i-1]] - lastw) *
490 for (j=i; j<size; j++)
491 if (weights[reverse[j]] == weights[reverse[i]])
495 wnext = numleft * (weights[reverse[i]] -
496 weights[reverse[i-1]]);
497 pbelow = wbelow / (wbelow + wnext);
498 dprintk("wbelow %lf wnext %lf pbelow %lf numleft %d\n",
499 wbelow, wnext, pbelow, numleft);
501 straw *= pow((double)1.0 / pbelow, (double)1.0 /
504 lastw = weights[reverse[i-1]];
505 } else if (map->straw_calc_version >= 1) {
506 /* zero weight items get 0 length straws! */
507 if (weights[reverse[i]] == 0) {
508 bucket->straws[reverse[i]] = 0;
514 /* set this item's straw */
515 bucket->straws[reverse[i]] = straw * 0x10000;
516 dprintk("item %d at %d weight %d straw %d (%lf)\n",
517 bucket->h.items[reverse[i]],
518 reverse[i], weights[reverse[i]],
519 bucket->straws[reverse[i]], straw);
524 /* adjust straw for next guy */
525 wbelow += ((double)weights[reverse[i-1]] - lastw) *
528 wnext = numleft * (weights[reverse[i]] -
529 weights[reverse[i-1]]);
530 pbelow = wbelow / (wbelow + wnext);
531 dprintk("wbelow %lf wnext %lf pbelow %lf numleft %d\n",
532 wbelow, wnext, pbelow, numleft);
534 straw *= pow((double)1.0 / pbelow, (double)1.0 /
537 lastw = weights[reverse[i-1]];
545 struct crush_bucket_straw *
546 crush_make_straw_bucket(struct crush_map *map,
553 struct crush_bucket_straw *bucket;
556 bucket = malloc(sizeof(*bucket));
559 memset(bucket, 0, sizeof(*bucket));
560 bucket->h.alg = CRUSH_BUCKET_STRAW;
561 bucket->h.hash = hash;
562 bucket->h.type = type;
563 bucket->h.size = size;
565 bucket->h.items = malloc(sizeof(__s32)*size);
566 if (!bucket->h.items)
568 bucket->item_weights = malloc(sizeof(__u32)*size);
569 if (!bucket->item_weights)
571 bucket->straws = malloc(sizeof(__u32)*size);
575 bucket->h.weight = 0;
576 for (i=0; i<size; i++) {
577 bucket->h.items[i] = items[i];
578 bucket->h.weight += weights[i];
579 bucket->item_weights[i] = weights[i];
582 if (crush_calc_straw(map, bucket) < 0)
587 free(bucket->straws);
588 free(bucket->item_weights);
589 free(bucket->h.items);
594 struct crush_bucket_straw2 *
595 crush_make_straw2_bucket(struct crush_map *map,
602 struct crush_bucket_straw2 *bucket;
605 bucket = malloc(sizeof(*bucket));
608 memset(bucket, 0, sizeof(*bucket));
609 bucket->h.alg = CRUSH_BUCKET_STRAW2;
610 bucket->h.hash = hash;
611 bucket->h.type = type;
612 bucket->h.size = size;
614 bucket->h.items = malloc(sizeof(__s32)*size);
615 if (!bucket->h.items)
617 bucket->item_weights = malloc(sizeof(__u32)*size);
618 if (!bucket->item_weights)
621 bucket->h.weight = 0;
622 for (i=0; i<size; i++) {
623 bucket->h.items[i] = items[i];
624 bucket->h.weight += weights[i];
625 bucket->item_weights[i] = weights[i];
630 free(bucket->item_weights);
631 free(bucket->h.items);
639 crush_make_bucket(struct crush_map *map,
640 int alg, int hash, int type, int size,
647 case CRUSH_BUCKET_UNIFORM:
649 item_weight = weights[0];
652 return (struct crush_bucket *)crush_make_uniform_bucket(hash, type, size, items, item_weight);
654 case CRUSH_BUCKET_LIST:
655 return (struct crush_bucket *)crush_make_list_bucket(hash, type, size, items, weights);
657 case CRUSH_BUCKET_TREE:
658 return (struct crush_bucket *)crush_make_tree_bucket(hash, type, size, items, weights);
660 case CRUSH_BUCKET_STRAW:
661 return (struct crush_bucket *)crush_make_straw_bucket(map, hash, type, size, items, weights);
662 case CRUSH_BUCKET_STRAW2:
663 return (struct crush_bucket *)crush_make_straw2_bucket(map, hash, type, size, items, weights);
669 /************************************************/
671 int crush_add_uniform_bucket_item(struct crush_bucket_uniform *bucket, int item, int weight)
673 int newsize = bucket->h.size + 1;
674 void *_realloc = NULL;
676 /* In such situation 'CRUSH_BUCKET_UNIFORM', the weight
677 provided for the item should be the same as
678 bucket->item_weight defined with 'crush_make_bucket'. This
679 assumption is enforced by the return value which is always
681 if (bucket->item_weight != weight) {
685 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
688 bucket->h.items = _realloc;
691 bucket->h.items[newsize-1] = item;
693 if (crush_addition_is_unsafe(bucket->h.weight, weight))
696 bucket->h.weight += weight;
702 int crush_add_list_bucket_item(struct crush_bucket_list *bucket, int item, int weight)
704 int newsize = bucket->h.size + 1;
705 void *_realloc = NULL;
707 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
710 bucket->h.items = _realloc;
712 if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
715 bucket->item_weights = _realloc;
717 if ((_realloc = realloc(bucket->sum_weights, sizeof(__u32)*newsize)) == NULL) {
720 bucket->sum_weights = _realloc;
723 bucket->h.items[newsize-1] = item;
724 bucket->item_weights[newsize-1] = weight;
727 if (crush_addition_is_unsafe(bucket->sum_weights[newsize-2], weight))
730 bucket->sum_weights[newsize-1] = bucket->sum_weights[newsize-2] + weight;
734 bucket->sum_weights[newsize-1] = weight;
737 bucket->h.weight += weight;
742 int crush_add_tree_bucket_item(struct crush_bucket_tree *bucket, int item, int weight)
744 int newsize = bucket->h.size + 1;
745 int depth = calc_depth(newsize);;
748 void *_realloc = NULL;
750 bucket->num_nodes = 1 << depth;
752 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
755 bucket->h.items = _realloc;
757 if ((_realloc = realloc(bucket->node_weights, sizeof(__u32)*bucket->num_nodes)) == NULL) {
760 bucket->node_weights = _realloc;
763 node = crush_calc_tree_node(newsize-1);
764 bucket->node_weights[node] = weight;
766 /* if the depth increase, we need to initialize the new root node's weight before add bucket item */
767 int root = bucket->num_nodes/2;
768 if (depth >= 2 && (node - 1) == root) {
769 /* if the new item is the first node in right sub tree, so
770 * the root node initial weight is left sub tree's weight
772 bucket->node_weights[root] = bucket->node_weights[root/2];
775 for (j=1; j<depth; j++) {
778 if (crush_addition_is_unsafe(bucket->node_weights[node], weight))
781 bucket->node_weights[node] += weight;
782 dprintk(" node %d weight %d\n", node, bucket->node_weights[node]);
786 if (crush_addition_is_unsafe(bucket->h.weight, weight))
789 bucket->h.items[newsize-1] = item;
790 bucket->h.weight += weight;
796 int crush_add_straw_bucket_item(struct crush_map *map,
797 struct crush_bucket_straw *bucket,
798 int item, int weight)
800 int newsize = bucket->h.size + 1;
802 void *_realloc = NULL;
804 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
807 bucket->h.items = _realloc;
809 if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
812 bucket->item_weights = _realloc;
814 if ((_realloc = realloc(bucket->straws, sizeof(__u32)*newsize)) == NULL) {
817 bucket->straws = _realloc;
820 bucket->h.items[newsize-1] = item;
821 bucket->item_weights[newsize-1] = weight;
823 if (crush_addition_is_unsafe(bucket->h.weight, weight))
826 bucket->h.weight += weight;
829 return crush_calc_straw(map, bucket);
832 int crush_add_straw2_bucket_item(struct crush_map *map,
833 struct crush_bucket_straw2 *bucket,
834 int item, int weight)
836 int newsize = bucket->h.size + 1;
838 void *_realloc = NULL;
840 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
843 bucket->h.items = _realloc;
845 if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
848 bucket->item_weights = _realloc;
851 bucket->h.items[newsize-1] = item;
852 bucket->item_weights[newsize-1] = weight;
854 if (crush_addition_is_unsafe(bucket->h.weight, weight))
857 bucket->h.weight += weight;
863 int crush_bucket_add_item(struct crush_map *map,
864 struct crush_bucket *b, int item, int weight)
867 case CRUSH_BUCKET_UNIFORM:
868 return crush_add_uniform_bucket_item((struct crush_bucket_uniform *)b, item, weight);
869 case CRUSH_BUCKET_LIST:
870 return crush_add_list_bucket_item((struct crush_bucket_list *)b, item, weight);
871 case CRUSH_BUCKET_TREE:
872 return crush_add_tree_bucket_item((struct crush_bucket_tree *)b, item, weight);
873 case CRUSH_BUCKET_STRAW:
874 return crush_add_straw_bucket_item(map, (struct crush_bucket_straw *)b, item, weight);
875 case CRUSH_BUCKET_STRAW2:
876 return crush_add_straw2_bucket_item(map, (struct crush_bucket_straw2 *)b, item, weight);
882 /************************************************/
884 int crush_remove_uniform_bucket_item(struct crush_bucket_uniform *bucket, int item)
888 void *_realloc = NULL;
890 for (i = 0; i < bucket->h.size; i++)
891 if (bucket->h.items[i] == item)
893 if (i == bucket->h.size)
896 for (j = i; j < bucket->h.size; j++)
897 bucket->h.items[j] = bucket->h.items[j+1];
898 newsize = --bucket->h.size;
899 if (bucket->item_weight < bucket->h.weight)
900 bucket->h.weight -= bucket->item_weight;
902 bucket->h.weight = 0;
904 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
907 bucket->h.items = _realloc;
912 int crush_remove_list_bucket_item(struct crush_bucket_list *bucket, int item)
918 for (i = 0; i < bucket->h.size; i++)
919 if (bucket->h.items[i] == item)
921 if (i == bucket->h.size)
924 weight = bucket->item_weights[i];
925 for (j = i; j < bucket->h.size; j++) {
926 bucket->h.items[j] = bucket->h.items[j+1];
927 bucket->item_weights[j] = bucket->item_weights[j+1];
928 bucket->sum_weights[j] = bucket->sum_weights[j+1] - weight;
930 if (weight < bucket->h.weight)
931 bucket->h.weight -= weight;
933 bucket->h.weight = 0;
934 newsize = --bucket->h.size;
936 void *_realloc = NULL;
938 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
941 bucket->h.items = _realloc;
943 if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
946 bucket->item_weights = _realloc;
948 if ((_realloc = realloc(bucket->sum_weights, sizeof(__u32)*newsize)) == NULL) {
951 bucket->sum_weights = _realloc;
956 int crush_remove_tree_bucket_item(struct crush_bucket_tree *bucket, int item)
961 for (i = 0; i < bucket->h.size; i++) {
965 int depth = calc_depth(bucket->h.size);
967 if (bucket->h.items[i] != item)
970 bucket->h.items[i] = 0;
971 node = crush_calc_tree_node(i);
972 weight = bucket->node_weights[node];
973 bucket->node_weights[node] = 0;
975 for (j = 1; j < depth; j++) {
977 bucket->node_weights[node] -= weight;
978 dprintk(" node %d weight %d\n", node, bucket->node_weights[node]);
980 if (weight < bucket->h.weight)
981 bucket->h.weight -= weight;
983 bucket->h.weight = 0;
986 if (i == bucket->h.size)
989 newsize = bucket->h.size;
990 while (newsize > 0) {
991 int node = crush_calc_tree_node(newsize - 1);
992 if (bucket->node_weights[node])
997 if (newsize != bucket->h.size) {
998 int olddepth, newdepth;
1000 void *_realloc = NULL;
1002 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
1005 bucket->h.items = _realloc;
1008 olddepth = calc_depth(bucket->h.size);
1009 newdepth = calc_depth(newsize);
1010 if (olddepth != newdepth) {
1011 bucket->num_nodes = 1 << newdepth;
1012 if ((_realloc = realloc(bucket->node_weights,
1013 sizeof(__u32)*bucket->num_nodes)) == NULL) {
1016 bucket->node_weights = _realloc;
1020 bucket->h.size = newsize;
1025 int crush_remove_straw_bucket_item(struct crush_map *map,
1026 struct crush_bucket_straw *bucket, int item)
1028 int newsize = bucket->h.size - 1;
1031 for (i = 0; i < bucket->h.size; i++) {
1032 if (bucket->h.items[i] == item) {
1033 if (bucket->item_weights[i] < bucket->h.weight)
1034 bucket->h.weight -= bucket->item_weights[i];
1036 bucket->h.weight = 0;
1037 for (j = i; j < bucket->h.size - 1; j++) {
1038 bucket->h.items[j] = bucket->h.items[j+1];
1039 bucket->item_weights[j] = bucket->item_weights[j+1];
1044 if (i == bucket->h.size)
1047 if (bucket->h.size == 0) {
1048 /* don't bother reallocating */
1051 void *_realloc = NULL;
1053 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
1056 bucket->h.items = _realloc;
1058 if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
1061 bucket->item_weights = _realloc;
1063 if ((_realloc = realloc(bucket->straws, sizeof(__u32)*newsize)) == NULL) {
1066 bucket->straws = _realloc;
1069 return crush_calc_straw(map, bucket);
1072 int crush_remove_straw2_bucket_item(struct crush_map *map,
1073 struct crush_bucket_straw2 *bucket, int item)
1075 int newsize = bucket->h.size - 1;
1078 for (i = 0; i < bucket->h.size; i++) {
1079 if (bucket->h.items[i] == item) {
1080 if (bucket->item_weights[i] < bucket->h.weight)
1081 bucket->h.weight -= bucket->item_weights[i];
1083 bucket->h.weight = 0;
1084 for (j = i; j < bucket->h.size - 1; j++) {
1085 bucket->h.items[j] = bucket->h.items[j+1];
1086 bucket->item_weights[j] = bucket->item_weights[j+1];
1091 if (i == bucket->h.size)
1096 /* don't bother reallocating a 0-length array. */
1100 void *_realloc = NULL;
1102 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
1105 bucket->h.items = _realloc;
1107 if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
1110 bucket->item_weights = _realloc;
1116 int crush_bucket_remove_item(struct crush_map *map, struct crush_bucket *b, int item)
1119 case CRUSH_BUCKET_UNIFORM:
1120 return crush_remove_uniform_bucket_item((struct crush_bucket_uniform *)b, item);
1121 case CRUSH_BUCKET_LIST:
1122 return crush_remove_list_bucket_item((struct crush_bucket_list *)b, item);
1123 case CRUSH_BUCKET_TREE:
1124 return crush_remove_tree_bucket_item((struct crush_bucket_tree *)b, item);
1125 case CRUSH_BUCKET_STRAW:
1126 return crush_remove_straw_bucket_item(map, (struct crush_bucket_straw *)b, item);
1127 case CRUSH_BUCKET_STRAW2:
1128 return crush_remove_straw2_bucket_item(map, (struct crush_bucket_straw2 *)b, item);
1135 /************************************************/
1137 int crush_adjust_uniform_bucket_item_weight(struct crush_bucket_uniform *bucket, int item, int weight)
1139 int diff = (weight - bucket->item_weight) * bucket->h.size;
1141 bucket->item_weight = weight;
1142 bucket->h.weight = bucket->item_weight * bucket->h.size;
1147 int crush_adjust_list_bucket_item_weight(struct crush_bucket_list *bucket, int item, int weight)
1152 for (i = 0; i < bucket->h.size; i++) {
1153 if (bucket->h.items[i] == item)
1156 if (i == bucket->h.size)
1159 diff = weight - bucket->item_weights[i];
1160 bucket->item_weights[i] = weight;
1161 bucket->h.weight += diff;
1163 for (j = i; j < bucket->h.size; j++)
1164 bucket->sum_weights[j] += diff;
1169 int crush_adjust_tree_bucket_item_weight(struct crush_bucket_tree *bucket, int item, int weight)
1174 unsigned depth = calc_depth(bucket->h.size);
1176 for (i = 0; i < bucket->h.size; i++) {
1177 if (bucket->h.items[i] == item)
1180 if (i == bucket->h.size)
1183 node = crush_calc_tree_node(i);
1184 diff = weight - bucket->node_weights[node];
1185 bucket->node_weights[node] = weight;
1186 bucket->h.weight += diff;
1188 for (j=1; j<depth; j++) {
1189 node = parent(node);
1190 bucket->node_weights[node] += diff;
1196 int crush_adjust_straw_bucket_item_weight(struct crush_map *map,
1197 struct crush_bucket_straw *bucket,
1198 int item, int weight)
1204 for (idx = 0; idx < bucket->h.size; idx++)
1205 if (bucket->h.items[idx] == item)
1207 if (idx == bucket->h.size)
1210 diff = weight - bucket->item_weights[idx];
1211 bucket->item_weights[idx] = weight;
1212 bucket->h.weight += diff;
1214 r = crush_calc_straw(map, bucket);
1221 int crush_adjust_straw2_bucket_item_weight(struct crush_map *map,
1222 struct crush_bucket_straw2 *bucket,
1223 int item, int weight)
1228 for (idx = 0; idx < bucket->h.size; idx++)
1229 if (bucket->h.items[idx] == item)
1231 if (idx == bucket->h.size)
1234 diff = weight - bucket->item_weights[idx];
1235 bucket->item_weights[idx] = weight;
1236 bucket->h.weight += diff;
1241 int crush_bucket_adjust_item_weight(struct crush_map *map,
1242 struct crush_bucket *b,
1243 int item, int weight)
1246 case CRUSH_BUCKET_UNIFORM:
1247 return crush_adjust_uniform_bucket_item_weight((struct crush_bucket_uniform *)b,
1249 case CRUSH_BUCKET_LIST:
1250 return crush_adjust_list_bucket_item_weight((struct crush_bucket_list *)b,
1252 case CRUSH_BUCKET_TREE:
1253 return crush_adjust_tree_bucket_item_weight((struct crush_bucket_tree *)b,
1255 case CRUSH_BUCKET_STRAW:
1256 return crush_adjust_straw_bucket_item_weight(map,
1257 (struct crush_bucket_straw *)b,
1259 case CRUSH_BUCKET_STRAW2:
1260 return crush_adjust_straw2_bucket_item_weight(map,
1261 (struct crush_bucket_straw2 *)b,
1268 /************************************************/
1270 static int crush_reweight_uniform_bucket(struct crush_map *map, struct crush_bucket_uniform *bucket)
1273 unsigned sum = 0, n = 0, leaves = 0;
1275 for (i = 0; i < bucket->h.size; i++) {
1276 int id = bucket->h.items[i];
1278 struct crush_bucket *c = map->buckets[-1-id];
1279 crush_reweight_bucket(map, c);
1281 if (crush_addition_is_unsafe(sum, c->weight))
1292 bucket->item_weight = sum / n; // more bucket children than leaves, average!
1293 bucket->h.weight = bucket->item_weight * bucket->h.size;
1298 static int crush_reweight_list_bucket(struct crush_map *map, struct crush_bucket_list *bucket)
1302 bucket->h.weight = 0;
1303 for (i = 0; i < bucket->h.size; i++) {
1304 int id = bucket->h.items[i];
1306 struct crush_bucket *c = map->buckets[-1-id];
1307 crush_reweight_bucket(map, c);
1308 bucket->item_weights[i] = c->weight;
1311 if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i]))
1314 bucket->h.weight += bucket->item_weights[i];
1320 static int crush_reweight_tree_bucket(struct crush_map *map, struct crush_bucket_tree *bucket)
1324 bucket->h.weight = 0;
1325 for (i = 0; i < bucket->h.size; i++) {
1326 int node = crush_calc_tree_node(i);
1327 int id = bucket->h.items[i];
1329 struct crush_bucket *c = map->buckets[-1-id];
1330 crush_reweight_bucket(map, c);
1331 bucket->node_weights[node] = c->weight;
1334 if (crush_addition_is_unsafe(bucket->h.weight, bucket->node_weights[node]))
1337 bucket->h.weight += bucket->node_weights[node];
1345 static int crush_reweight_straw_bucket(struct crush_map *map, struct crush_bucket_straw *bucket)
1349 bucket->h.weight = 0;
1350 for (i = 0; i < bucket->h.size; i++) {
1351 int id = bucket->h.items[i];
1353 struct crush_bucket *c = map->buckets[-1-id];
1354 crush_reweight_bucket(map, c);
1355 bucket->item_weights[i] = c->weight;
1358 if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i]))
1361 bucket->h.weight += bucket->item_weights[i];
1363 crush_calc_straw(map, bucket);
1368 static int crush_reweight_straw2_bucket(struct crush_map *map, struct crush_bucket_straw2 *bucket)
1372 bucket->h.weight = 0;
1373 for (i = 0; i < bucket->h.size; i++) {
1374 int id = bucket->h.items[i];
1376 struct crush_bucket *c = map->buckets[-1-id];
1377 crush_reweight_bucket(map, c);
1378 bucket->item_weights[i] = c->weight;
1381 if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i]))
1384 bucket->h.weight += bucket->item_weights[i];
1390 int crush_reweight_bucket(struct crush_map *map, struct crush_bucket *b)
1393 case CRUSH_BUCKET_UNIFORM:
1394 return crush_reweight_uniform_bucket(map, (struct crush_bucket_uniform *)b);
1395 case CRUSH_BUCKET_LIST:
1396 return crush_reweight_list_bucket(map, (struct crush_bucket_list *)b);
1397 case CRUSH_BUCKET_TREE:
1398 return crush_reweight_tree_bucket(map, (struct crush_bucket_tree *)b);
1399 case CRUSH_BUCKET_STRAW:
1400 return crush_reweight_straw_bucket(map, (struct crush_bucket_straw *)b);
1401 case CRUSH_BUCKET_STRAW2:
1402 return crush_reweight_straw2_bucket(map, (struct crush_bucket_straw2 *)b);
1408 struct crush_choose_arg *crush_make_choose_args(struct crush_map *map, int num_positions)
1411 int sum_bucket_size = 0;
1412 int bucket_count = 0;
1413 for (b = 0; b < map->max_buckets; b++) {
1414 if (map->buckets[b] == 0)
1416 sum_bucket_size += map->buckets[b]->size;
1419 dprintk("sum_bucket_size %d max_buckets %d bucket_count %d\n",
1420 sum_bucket_size, map->max_buckets, bucket_count);
1421 int size = (sizeof(struct crush_choose_arg) * map->max_buckets +
1422 sizeof(struct crush_weight_set) * bucket_count * num_positions +
1423 sizeof(__u32) * sum_bucket_size * num_positions + // weights
1424 sizeof(__s32) * sum_bucket_size); // ids
1425 char *space = malloc(size);
1426 struct crush_choose_arg *arg = (struct crush_choose_arg *)space;
1427 struct crush_weight_set *weight_set = (struct crush_weight_set *)(arg + map->max_buckets);
1428 __u32 *weights = (__u32 *)(weight_set + bucket_count * num_positions);
1429 char *weight_set_ends = (char*)weights;
1430 __s32 *ids = (__s32 *)(weights + sum_bucket_size * num_positions);
1431 char *weights_end = (char *)ids;
1432 char *ids_end = (char *)(ids + sum_bucket_size);
1433 BUG_ON(space + size != ids_end);
1434 for (b = 0; b < map->max_buckets; b++) {
1435 if (map->buckets[b] == 0) {
1436 memset(&arg[b], '\0', sizeof(struct crush_choose_arg));
1439 struct crush_bucket_straw2 *bucket = (struct crush_bucket_straw2 *)map->buckets[b];
1442 for (position = 0; position < num_positions; position++) {
1443 memcpy(weights, bucket->item_weights, sizeof(__u32) * bucket->h.size);
1444 weight_set[position].weights = weights;
1445 weight_set[position].size = bucket->h.size;
1446 dprintk("moving weight %d bytes forward\n", (int)((weights + bucket->h.size) - weights));
1447 weights += bucket->h.size;
1449 arg[b].weight_set = weight_set;
1450 arg[b].weight_set_size = num_positions;
1451 weight_set += position;
1453 memcpy(ids, bucket->h.items, sizeof(__s32) * bucket->h.size);
1455 arg[b].ids_size = bucket->h.size;
1456 ids += bucket->h.size;
1458 BUG_ON((char*)weight_set_ends != (char*)weight_set);
1459 BUG_ON((char*)weights_end != (char*)weights);
1460 BUG_ON((char*)ids != (char*)ids_end);
1464 void crush_destroy_choose_args(struct crush_choose_arg *args)
1469 /***************************/
1471 /* methods to check for safe arithmetic operations */
1473 int crush_addition_is_unsafe(__u32 a, __u32 b)
1475 if ((((__u32)(-1)) - b) < a)
1481 int crush_multiplication_is_unsafe(__u32 a, __u32 b)
1483 /* prevent division by zero */
1488 if ((((__u32)(-1)) / b) < a)
1494 /***************************/
1496 /* methods to configure crush_map */
1498 void set_legacy_crush_map(struct crush_map *map) {
1499 /* initialize legacy tunable values */
1500 map->choose_local_tries = 2;
1501 map->choose_local_fallback_tries = 5;
1502 map->choose_total_tries = 19;
1503 map->chooseleaf_descend_once = 0;
1504 map->chooseleaf_vary_r = 0;
1505 map->chooseleaf_stable = 0;
1506 map->straw_calc_version = 0;
1508 // by default, use legacy types, and also exclude tree,
1509 // since it was buggy.
1510 map->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
1513 void set_optimal_crush_map(struct crush_map *map) {
1514 map->choose_local_tries = 0;
1515 map->choose_local_fallback_tries = 0;
1516 map->choose_total_tries = 50;
1517 map->chooseleaf_descend_once = 1;
1518 map->chooseleaf_vary_r = 1;
1519 map->chooseleaf_stable = 1;
1520 map->allowed_bucket_algs = (
1521 (1 << CRUSH_BUCKET_UNIFORM) |
1522 (1 << CRUSH_BUCKET_LIST) |
1523 (1 << CRUSH_BUCKET_STRAW) |
1524 (1 << CRUSH_BUCKET_STRAW2));