1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
3 // vim: ts=8 sw=2 smarttab
5 * Bitmap based in-memory allocator.
6 * Author: Ramesh Chander, Ramesh.Chander@sandisk.com
9 * Storage is divided into bitmap of blocks. Each bitmap has size of
10 * unsigned long. Group of bitmap creates a Zone. Zone is a unit where
11 * at a time single thread can be active as well as single biggest
12 * contiguous allocation that can be requested.
14 * Rest of the nodes are classified into three categories:
15 * root node or Allocator
16 * internal nodes or BitMapAreaIN
17 * final nodes that contains Zones called BitMapAreaLeaf
18 * This classification is according to their own implmentation of some
19 * of the interfaces defined in BitMapArea.
22 #include "BitAllocator.h"
24 #include "bluestore_types.h"
25 #include "common/debug.h"
28 #define dout_context cct
29 #define dout_subsys ceph_subsys_bluestore
31 #define dout_prefix *_dout << "bitalloc:"
33 MEMPOOL_DEFINE_OBJECT_FACTORY(BitMapArea, BitMapArea, bluestore_alloc);
34 MEMPOOL_DEFINE_OBJECT_FACTORY(BitMapAreaIN, BitMapAreaIN, bluestore_alloc);
35 MEMPOOL_DEFINE_OBJECT_FACTORY(BitMapAreaLeaf, BitMapAreaLeaf, bluestore_alloc);
36 MEMPOOL_DEFINE_OBJECT_FACTORY(BitMapZone, BitMapZone, bluestore_alloc);
37 MEMPOOL_DEFINE_OBJECT_FACTORY(BmapEntry, BmapEntry, bluestore_alloc);
38 MEMPOOL_DEFINE_OBJECT_FACTORY(BitAllocator, BitAllocator, bluestore_alloc);
40 int64_t BitMapAreaLeaf::count = 0;
41 int64_t BitMapZone::count = 0;
42 int64_t BitMapZone::total_blocks = 0;
46 int64_t BmapEntityListIter::index()
51 BmapEntry::BmapEntry(CephContext*, const bool full)
54 m_bits = BmapEntry::full_bmask();
56 m_bits = BmapEntry::empty_bmask();
60 BmapEntry::~BmapEntry()
65 bool BmapEntry::check_bit(int bit)
67 return (atomic_fetch() & bit_mask(bit));
70 bool BmapEntry::is_allocated(int64_t offset, int64_t num_bits)
72 bmap_t bmask = BmapEntry::align_mask(num_bits) >> offset;
73 return ((m_bits & bmask) == bmask);
76 void BmapEntry::clear_bit(int bit)
78 bmap_t bmask = bit_mask(bit);
82 void BmapEntry::clear_bits(int offset, int num_bits)
88 bmap_t bmask = BmapEntry::align_mask(num_bits) >> offset;
92 void BmapEntry::set_bits(int offset, int num_bits)
98 bmap_t bmask = BmapEntry::align_mask(num_bits) >> offset;
103 * Allocate a bit if it was free.
104 * Retruns true if it was free.
106 bool BmapEntry::check_n_set_bit(int bit)
108 bmap_t bmask = bit_mask(bit);
109 bool res = !(m_bits & bmask);
115 * Find N cont free bits in BitMap starting from an offset.
117 * Returns number of continuous bits found.
119 int BmapEntry::find_n_cont_bits(int start_offset, int64_t num_bits)
128 if (start_offset >= BmapEntry::size()) {
132 for (i = start_offset; i < BmapEntry::size() && count < num_bits; i++) {
133 if (!check_n_set_bit(i)) {
143 * Find N free bits starting search from a given offset.
145 * Returns number of bits found, start bit and end of
146 * index next to bit where our search ended + 1.
148 int BmapEntry::find_n_free_bits(int start_idx, int64_t max_bits,
149 int *free_bit, int *end_idx)
155 alloc_assert(max_bits > 0);
158 * Find free bit aligned to bit_align return the bit_num in free_bit.
160 if (atomic_fetch() == BmapEntry::full_bmask()) {
162 * All bits full, return fail.
164 *end_idx = BmapEntry::size();
169 * Do a serial scan on bitmap.
171 for (i = start_idx; i < BmapEntry::size(); i++) {
172 if (check_n_set_bit(i)) {
174 * Found first free bit
181 count += find_n_cont_bits(i + 1, max_bits - 1);
183 (*end_idx) = i + count;
188 * Find first series of contiguous bits free in bitmap starting
189 * from start offset that either
190 * satisfy our need or are touching right edge of bitmap.
192 * Returns allocated bits, start bit of allocated, number of bits
193 * scanned from start offset.
196 BmapEntry::find_first_set_bits(int64_t required_blocks,
197 int bit_offset, int *start_offset,
206 while (bit_offset < BmapEntry::size()) {
207 conti = find_n_free_bits(bit_offset, required_blocks,
208 start_offset, &end_idx);
210 *scanned += end_idx - bit_offset;
212 * Either end of bitmap or got required.
214 if (conti == required_blocks ||
215 (conti + *start_offset == BmapEntry::size())) {
221 * Did not get expected, search from next index again.
223 clear_bits(*start_offset, conti);
226 bit_offset = end_idx;
232 void BmapEntry::dump_state(CephContext* const cct, const int& count)
234 dout(0) << count << ":: 0x" << std::hex << m_bits << std::dec << dendl;
238 * Zone related functions.
240 void BitMapZone::init(CephContext* const cct,
241 const int64_t zone_num,
242 const int64_t total_blocks,
245 m_area_index = zone_num;
246 BitMapZone::total_blocks = total_blocks;
247 alloc_assert(size() > 0);
249 m_used_blocks = def? total_blocks: 0;
251 int64_t num_bmaps = total_blocks / BmapEntry::size();
252 alloc_assert(num_bmaps < std::numeric_limits<int16_t>::max());
253 alloc_assert(total_blocks < std::numeric_limits<int32_t>::max());
254 alloc_assert(!(total_blocks % BmapEntry::size()));
256 m_bmap_vec.resize(num_bmaps, BmapEntry(cct, def));
260 int64_t BitMapZone::sub_used_blocks(int64_t num_blocks)
262 return std::atomic_fetch_sub(&m_used_blocks, (int32_t) num_blocks);
265 int64_t BitMapZone::add_used_blocks(int64_t num_blocks)
267 return std::atomic_fetch_add(&m_used_blocks, (int32_t)num_blocks) + num_blocks;
270 /* Intensionally hinted because BitMapAreaLeaf::child_check_n_lock. */
271 inline int64_t BitMapZone::get_used_blocks()
273 return std::atomic_load(&m_used_blocks);
276 bool BitMapZone::reserve_blocks(int64_t num_blocks)
282 void BitMapZone::unreserve(int64_t num_blocks, int64_t allocated)
287 int64_t BitMapZone::get_reserved_blocks()
293 BitMapZone::BitMapZone(CephContext* cct, int64_t total_blocks,
297 init(cct, zone_num, total_blocks, false);
300 BitMapZone::BitMapZone(CephContext* cct, int64_t total_blocks,
301 int64_t zone_num, bool def)
304 init(cct, zone_num, total_blocks, def);
307 void BitMapZone::shutdown()
311 BitMapZone::~BitMapZone()
316 * Check if some search took zone marker to end.
318 * The inline hint has been added intensionally because of importance of this
319 * method for BitMapAreaLeaf::child_check_n_lock, and thus for the overall
320 * allocator's performance. Examination of disassemblies coming from GCC 5.4.0
321 * showed that the compiler really needs that hint.
323 inline bool BitMapZone::is_exhausted()
325 /* BitMapZone::get_used_blocks operates atomically. No need for lock. */
326 return BitMapZone::get_used_blocks() == BitMapZone::size();
329 bool BitMapZone::is_allocated(int64_t start_block, int64_t num_blocks)
331 BmapEntry *bmap = NULL;
333 int64_t falling_in_bmap = 0;
336 bit = start_block % BmapEntry::size();
337 bmap = &m_bmap_vec[start_block / BmapEntry::size()];
338 falling_in_bmap = MIN(num_blocks, BmapEntry::size() - bit);
340 if (!bmap->is_allocated(bit, falling_in_bmap)) {
344 start_block += falling_in_bmap;
345 num_blocks -= falling_in_bmap;
351 void BitMapZone::set_blocks_used(int64_t start_block, int64_t num_blocks)
353 BmapEntry *bmap = NULL;
355 int64_t falling_in_bmap = 0;
356 int64_t blks = num_blocks;
359 bit = start_block % BmapEntry::size();
360 bmap = &m_bmap_vec[start_block / BmapEntry::size()];
361 falling_in_bmap = MIN(blks, BmapEntry::size() - bit);
363 bmap->set_bits(bit, falling_in_bmap);
365 start_block += falling_in_bmap;
366 blks -= falling_in_bmap;
368 add_used_blocks(num_blocks);
371 void BitMapZone::free_blocks_int(int64_t start_block, int64_t num_blocks)
373 BmapEntry *bmap = NULL;
375 int64_t falling_in_bmap = 0;
376 int64_t count = num_blocks;
377 int64_t first_blk = start_block;
379 if (num_blocks == 0) {
382 alloc_dbg_assert(is_allocated(start_block, num_blocks));
385 bit = first_blk % BmapEntry::size();
386 bmap = &m_bmap_vec[first_blk / BmapEntry::size()];
387 falling_in_bmap = MIN(count, BmapEntry::size() - bit);
389 bmap->clear_bits(bit, falling_in_bmap);
391 first_blk += falling_in_bmap;
392 count -= falling_in_bmap;
394 alloc_dbg_assert(!is_allocated(start_block, num_blocks));
397 void BitMapZone::lock_excl()
402 bool BitMapZone::lock_excl_try()
404 return m_lock.try_lock();
407 void BitMapZone::unlock()
412 bool BitMapZone::check_locked()
414 return !lock_excl_try();
417 void BitMapZone::free_blocks(int64_t start_block, int64_t num_blocks)
419 free_blocks_int(start_block, num_blocks);
420 sub_used_blocks(num_blocks);
421 alloc_assert(get_used_blocks() >= 0);
424 int64_t BitMapZone::alloc_blocks_dis(int64_t num_blocks,
427 int64_t zone_blk_off,
428 ExtentList *alloc_blocks)
430 int64_t bmap_idx = hint / BmapEntry::size();
431 int bit = hint % BmapEntry::size();
432 BmapEntry *bmap = NULL;
433 int64_t allocated = 0;
435 int64_t alloc_cont = 0;
436 int64_t last_cont = 0;
437 int64_t last_running_ext = 0;
438 int search_idx = bit;
443 alloc_assert(check_locked());
445 BitMapEntityIter <BmapEntry> iter = BitMapEntityIter<BmapEntry>(
446 &m_bmap_vec, bmap_idx);
452 while (allocated < num_blocks) {
453 blk_off = zone_blk_off + bmap_idx * bmap->size();
456 * We had bits free at end of last bitmap, try to complete required
457 * min alloc size using that.
459 alloc_cont = bmap->find_n_cont_bits(0, min_alloc - last_cont);
460 allocated += alloc_cont;
461 last_cont += alloc_cont;
465 this->free_blocks_int(last_running_ext - zone_blk_off, last_cont);
467 allocated -= last_cont;
469 } else if (last_cont / min_alloc) {
471 * Got contiguous min_alloc_size across bitmaps.
473 alloc_blocks->add_extents(last_running_ext, last_cont);
475 last_running_ext = 0;
477 search_idx = alloc_cont;
480 * Try to allocate min_alloc_size bits from given bmap.
482 alloc_cont = bmap->find_first_set_bits(min_alloc, search_idx, &start_off, &scanned);
483 search_idx = search_idx + scanned;
484 allocated += alloc_cont;
485 if (alloc_cont / min_alloc) {
487 * Got contiguous min_alloc_size within a bitmap.
489 alloc_blocks->add_extents(blk_off + start_off, min_alloc);
492 if (alloc_cont % min_alloc) {
494 * Got some bits at end of bitmap, carry them to try match with
495 * start bits from next bitmap.
498 last_running_ext = blk_off + start_off;
500 last_cont += alloc_cont % min_alloc;
505 if (search_idx == BmapEntry::size()) {
507 bmap_idx = iter.index();
508 if ((bmap = iter.next()) == NULL) {
510 this->free_blocks_int(last_running_ext - zone_blk_off, last_cont);
512 allocated -= last_cont;
518 add_used_blocks(allocated);
524 void BitMapZone::dump_state(CephContext* const cct, int& count)
526 BmapEntry *bmap = NULL;
528 BitMapEntityIter <BmapEntry> iter = BitMapEntityIter<BmapEntry>(
530 dout(0) << __func__ << " zone " << count << " dump start " << dendl;
531 while ((bmap = static_cast<BmapEntry *>(iter.next()))) {
532 bmap->dump_state(cct, bmap_idx);
535 dout(0) << __func__ << " zone " << count << " dump end " << dendl;
541 * BitMapArea Leaf and non-Leaf functions.
543 int64_t BitMapArea::get_zone_size(CephContext* cct)
545 return cct->_conf->bluestore_bitmapallocator_blocks_per_zone;
548 int64_t BitMapArea::get_span_size(CephContext* cct)
550 return cct->_conf->bluestore_bitmapallocator_span_size;
553 int BitMapArea::get_level(CephContext* cct, int64_t total_blocks)
556 int64_t zone_size_block = get_zone_size(cct);
557 int64_t span_size = get_span_size(cct);
558 int64_t spans = zone_size_block * span_size;
559 while (spans < total_blocks) {
566 int64_t BitMapArea::get_level_factor(CephContext* cct, int level)
568 alloc_assert(level > 0);
570 int64_t zone_size = get_zone_size(cct);
575 int64_t level_factor = zone_size;
576 int64_t span_size = get_span_size(cct);
578 level_factor *= span_size;
584 int64_t BitMapArea::get_index()
590 * BitMapArea Leaf and Internal
592 BitMapAreaIN::BitMapAreaIN(CephContext* cct)
598 void BitMapAreaIN::init_common(CephContext* const cct,
599 const int64_t total_blocks,
600 const int64_t area_idx,
603 m_area_index = area_idx;
604 m_total_blocks = total_blocks;
605 m_level = BitMapArea::get_level(cct, total_blocks);
606 m_reserved_blocks = 0;
608 m_used_blocks = def? total_blocks: 0;
611 void BitMapAreaIN::init(CephContext* const cct,
612 int64_t total_blocks,
613 const int64_t area_idx,
616 int64_t num_child = 0;
617 alloc_assert(!(total_blocks % BmapEntry::size()));
619 init_common(cct, total_blocks, area_idx, def);
620 int64_t level_factor = BitMapArea::get_level_factor(cct, m_level);
622 num_child = (total_blocks + level_factor - 1) / level_factor;
623 alloc_assert(num_child < std::numeric_limits<int16_t>::max());
625 m_child_size_blocks = level_factor;
627 std::vector<BitMapArea*> children;
628 children.reserve(num_child);
630 for (i = 0; i < num_child - 1; i++) {
632 children.push_back(new BitMapAreaLeaf(cct, m_child_size_blocks, i, def));
634 children.push_back(new BitMapAreaIN(cct, m_child_size_blocks, i, def));
636 total_blocks -= m_child_size_blocks;
639 int last_level = BitMapArea::get_level(cct, total_blocks);
640 if (last_level == 1) {
641 children.push_back(new BitMapAreaLeaf(cct, total_blocks, i, def));
643 children.push_back(new BitMapAreaIN(cct, total_blocks, i, def));
645 m_child_list = BitMapAreaList(std::move(children));
648 BitMapAreaIN::BitMapAreaIN(CephContext* cct,int64_t total_blocks,
652 init(cct, total_blocks, area_idx, false);
655 BitMapAreaIN::BitMapAreaIN(CephContext* cct, int64_t total_blocks,
656 int64_t area_idx, bool def)
659 init(cct, total_blocks, area_idx, def);
662 BitMapAreaIN::~BitMapAreaIN()
666 void BitMapAreaIN::shutdown()
674 bool BitMapAreaIN::child_check_n_lock(BitMapArea *child, int64_t required)
676 child->lock_shared();
678 if (child->is_exhausted()) {
683 int64_t child_used_blocks = child->get_used_blocks();
684 int64_t child_total_blocks = child->size();
685 if ((child_total_blocks - child_used_blocks) < required) {
693 void BitMapAreaIN::child_unlock(BitMapArea *child)
698 bool BitMapAreaIN::is_exhausted()
700 return get_used_blocks() == size();
703 int64_t BitMapAreaIN::add_used_blocks(int64_t blks)
705 std::lock_guard<std::mutex> l(m_blocks_lock);
706 m_used_blocks += blks;
707 return m_used_blocks;
710 int64_t BitMapAreaIN::sub_used_blocks(int64_t num_blocks)
712 std::lock_guard<std::mutex> l(m_blocks_lock);
714 int64_t used_blks = m_used_blocks;
715 m_used_blocks -= num_blocks;
716 alloc_assert(m_used_blocks >= 0);
720 int64_t BitMapAreaIN::get_used_blocks()
722 std::lock_guard<std::mutex> l(m_blocks_lock);
723 return m_used_blocks;
726 int64_t BitMapAreaIN::get_used_blocks_adj()
728 std::lock_guard<std::mutex> l(m_blocks_lock);
729 return m_used_blocks - m_reserved_blocks;
732 bool BitMapAreaIN::reserve_blocks(int64_t num)
735 std::lock_guard<std::mutex> u_l(m_blocks_lock);
736 if (m_used_blocks + num <= size()) {
737 m_used_blocks += num;
738 m_reserved_blocks += num;
741 alloc_assert(m_used_blocks <= size());
745 void BitMapAreaIN::unreserve(int64_t needed, int64_t allocated)
747 std::lock_guard<std::mutex> l(m_blocks_lock);
748 m_used_blocks -= (needed - allocated);
749 m_reserved_blocks -= needed;
750 alloc_assert(m_used_blocks >= 0);
751 alloc_assert(m_reserved_blocks >= 0);
753 int64_t BitMapAreaIN::get_reserved_blocks()
755 std::lock_guard<std::mutex> l(m_blocks_lock);
756 return m_reserved_blocks;
759 bool BitMapAreaIN::is_allocated(int64_t start_block, int64_t num_blocks)
761 BitMapArea *area = NULL;
762 int64_t area_block_offset = 0;
763 int64_t falling_in_area = 0;
765 alloc_assert(start_block >= 0 &&
766 (start_block + num_blocks <= size()));
768 if (num_blocks == 0) {
773 area = static_cast<BitMapArea *>(m_child_list.get_nth_item(
774 start_block / m_child_size_blocks));
776 area_block_offset = start_block % m_child_size_blocks;
777 falling_in_area = MIN(m_child_size_blocks - area_block_offset,
779 if (!area->is_allocated(area_block_offset, falling_in_area)) {
782 start_block += falling_in_area;
783 num_blocks -= falling_in_area;
788 int64_t BitMapAreaIN::alloc_blocks_dis_int_work(bool wrap, int64_t num_blocks, int64_t min_alloc,
789 int64_t hint, int64_t area_blk_off, ExtentList *block_list)
791 BitMapArea *child = NULL;
792 int64_t allocated = 0;
795 BmapEntityListIter iter = BmapEntityListIter(
796 &m_child_list, hint / m_child_size_blocks, wrap);
798 while ((child = static_cast<BitMapArea *>(iter.next()))) {
799 if (!child_check_n_lock(child, 1)) {
804 blk_off = child->get_index() * m_child_size_blocks + area_blk_off;
805 allocated += child->alloc_blocks_dis(num_blocks - allocated, min_alloc,
806 hint % m_child_size_blocks, blk_off, block_list);
809 if (allocated == num_blocks) {
817 int64_t BitMapAreaIN::alloc_blocks_dis_int(int64_t num_blocks, int64_t min_alloc,
818 int64_t hint, int64_t area_blk_off, ExtentList *block_list)
820 return alloc_blocks_dis_int_work(false, num_blocks, min_alloc, hint,
821 area_blk_off, block_list);
824 int64_t BitMapAreaIN::alloc_blocks_dis(int64_t num_blocks, int64_t min_alloc,
825 int64_t hint, int64_t blk_off, ExtentList *block_list)
827 int64_t allocated = 0;
830 allocated += alloc_blocks_dis_int(num_blocks, min_alloc, hint, blk_off, block_list);
831 add_used_blocks(allocated);
838 void BitMapAreaIN::set_blocks_used_int(int64_t start_block, int64_t num_blocks)
840 BitMapArea *child = NULL;
841 int64_t child_block_offset = 0;
842 int64_t falling_in_child = 0;
843 int64_t blks = num_blocks;
844 int64_t start_blk = start_block;
846 alloc_assert(start_block >= 0);
849 child = static_cast<BitMapArea *>(m_child_list.get_nth_item(
850 start_blk / m_child_size_blocks));
852 child_block_offset = start_blk % child->size();
853 falling_in_child = MIN(m_child_size_blocks - child_block_offset,
855 child->set_blocks_used(child_block_offset, falling_in_child);
856 start_blk += falling_in_child;
857 blks -= falling_in_child;
860 add_used_blocks(num_blocks);
861 alloc_dbg_assert(is_allocated(start_block, num_blocks));
864 void BitMapAreaIN::set_blocks_used(int64_t start_block, int64_t num_blocks)
866 if (num_blocks == 0) {
871 set_blocks_used_int(start_block, num_blocks);
875 void BitMapAreaIN::free_blocks_int(int64_t start_block, int64_t num_blocks)
877 BitMapArea *child = NULL;
878 int64_t child_block_offset = 0;
879 int64_t falling_in_child = 0;
881 alloc_assert(start_block >= 0 &&
882 (start_block + num_blocks) <= size());
884 if (num_blocks == 0) {
889 child = static_cast<BitMapArea *>(m_child_list.get_nth_item(
890 start_block / m_child_size_blocks));
892 child_block_offset = start_block % m_child_size_blocks;
894 falling_in_child = MIN(m_child_size_blocks - child_block_offset,
896 child->free_blocks(child_block_offset, falling_in_child);
897 start_block += falling_in_child;
898 num_blocks -= falling_in_child;
902 void BitMapAreaIN::free_blocks(int64_t start_block, int64_t num_blocks)
904 if (num_blocks == 0) {
908 alloc_dbg_assert(is_allocated(start_block, num_blocks));
910 free_blocks_int(start_block, num_blocks);
911 (void) sub_used_blocks(num_blocks);
916 void BitMapAreaIN::dump_state(CephContext* const cct, int& count)
918 BitMapArea *child = NULL;
920 BmapEntityListIter iter = BmapEntityListIter(
921 &m_child_list, 0, false);
923 while ((child = static_cast<BitMapArea *>(iter.next()))) {
924 child->dump_state(cct, count);
931 BitMapAreaLeaf::BitMapAreaLeaf(CephContext* cct, int64_t total_blocks,
935 init(cct, total_blocks, area_idx, false);
938 BitMapAreaLeaf::BitMapAreaLeaf(CephContext* cct, int64_t total_blocks,
939 int64_t area_idx, bool def)
942 init(cct, total_blocks, area_idx, def);
945 void BitMapAreaLeaf::init(CephContext* const cct,
946 const int64_t total_blocks,
947 const int64_t area_idx,
950 int64_t num_child = 0;
951 alloc_assert(!(total_blocks % BmapEntry::size()));
953 init_common(cct, total_blocks, area_idx, def);
954 alloc_assert(m_level == 1);
955 int zone_size_block = get_zone_size(cct);
956 alloc_assert(zone_size_block > 0);
957 num_child = (total_blocks + zone_size_block - 1) / zone_size_block;
958 alloc_assert(num_child);
959 m_child_size_blocks = total_blocks / num_child;
961 std::vector<BitMapArea*> children;
962 children.reserve(num_child);
963 for (int i = 0; i < num_child; i++) {
964 children.emplace_back(new BitMapZone(cct, m_child_size_blocks, i, def));
967 m_child_list = BitMapAreaList(std::move(children));
969 BitMapAreaLeaf::incr_count();
972 BitMapAreaLeaf::~BitMapAreaLeaf()
976 for (int64_t i = 0; i < m_child_list.size(); i++) {
977 auto child = static_cast<BitMapArea *>(m_child_list.get_nth_item(i));
984 /* Intensionally hinted because BitMapAreaLeaf::alloc_blocks_dis_int. */
985 inline bool BitMapAreaLeaf::child_check_n_lock(BitMapZone* const child,
986 const int64_t required,
989 /* The exhausted check can be performed without acquiring the lock. This
990 * is because 1) BitMapZone::is_exhausted() actually operates atomically
991 * and 2) it's followed by the exclusive, required-aware re-verification. */
992 if (child->BitMapZone::is_exhausted()) {
998 } else if (!child->lock_excl_try()) {
1002 int64_t child_used_blocks = child->get_used_blocks();
1003 int64_t child_total_blocks = child->size();
1004 if ((child_total_blocks - child_used_blocks) < required) {
1012 int64_t BitMapAreaLeaf::alloc_blocks_dis_int(int64_t num_blocks, int64_t min_alloc,
1013 int64_t hint, int64_t area_blk_off, ExtentList *block_list)
1015 BitMapZone* child = nullptr;
1016 int64_t allocated = 0;
1017 int64_t blk_off = 0;
1019 BmapEntityListIter iter = BmapEntityListIter(
1020 &m_child_list, hint / m_child_size_blocks, false);
1022 /* We're sure the only element type we aggregate is BitMapZone,
1023 * so there is no business to go through vptr and thus prohibit
1024 * compiler to inline the stuff. Consult BitMapAreaLeaf::init. */
1025 while ((child = static_cast<BitMapZone*>(iter.next()))) {
1026 if (!child_check_n_lock(child, 1, false)) {
1031 blk_off = child->get_index() * m_child_size_blocks + area_blk_off;
1032 allocated += child->alloc_blocks_dis(num_blocks - allocated, min_alloc,
1033 hint % m_child_size_blocks, blk_off, block_list);
1035 if (allocated == num_blocks) {
1043 void BitMapAreaLeaf::free_blocks_int(int64_t start_block, int64_t num_blocks)
1045 BitMapArea *child = NULL;
1046 int64_t child_block_offset = 0;
1047 int64_t falling_in_child = 0;
1049 alloc_assert(start_block >= 0 &&
1050 (start_block + num_blocks) <= size());
1052 if (num_blocks == 0) {
1056 while (num_blocks) {
1057 child = static_cast<BitMapArea *>(m_child_list.get_nth_item(
1058 start_block / m_child_size_blocks));
1060 child_block_offset = start_block % m_child_size_blocks;
1062 falling_in_child = MIN(m_child_size_blocks - child_block_offset,
1066 child->free_blocks(child_block_offset, falling_in_child);
1068 start_block += falling_in_child;
1069 num_blocks -= falling_in_child;
1074 * Main allocator functions.
1076 BitAllocator::BitAllocator(CephContext* cct, int64_t total_blocks,
1077 int64_t zone_size_block, bmap_alloc_mode_t mode)
1078 : BitMapAreaIN(cct),
1081 init_check(total_blocks, zone_size_block, mode, false, false);
1084 BitAllocator::BitAllocator(CephContext* cct, int64_t total_blocks,
1085 int64_t zone_size_block, bmap_alloc_mode_t mode,
1087 : BitMapAreaIN(cct),
1090 init_check(total_blocks, zone_size_block, mode, def, false);
1093 BitAllocator::BitAllocator(CephContext* cct, int64_t total_blocks,
1094 int64_t zone_size_block, bmap_alloc_mode_t mode,
1095 bool def, bool stats_on)
1096 : BitMapAreaIN(cct),
1099 init_check(total_blocks, zone_size_block, mode, def, stats_on);
1102 void BitAllocator::init_check(int64_t total_blocks, int64_t zone_size_block,
1103 bmap_alloc_mode_t mode, bool def, bool stats_on)
1105 int64_t unaligned_blocks = 0;
1107 if (mode != SERIAL && mode != CONCURRENT) {
1111 if (total_blocks <= 0) {
1115 if (zone_size_block == 0 ||
1116 zone_size_block < BmapEntry::size()) {
1120 zone_size_block = (zone_size_block / BmapEntry::size()) *
1123 unaligned_blocks = total_blocks % zone_size_block;
1124 m_extra_blocks = unaligned_blocks? zone_size_block - unaligned_blocks: 0;
1125 total_blocks = ROUND_UP_TO(total_blocks, zone_size_block);
1127 m_alloc_mode = mode;
1128 m_is_stats_on = stats_on;
1129 if (m_is_stats_on) {
1130 m_stats = new BitAllocatorStats();
1133 pthread_rwlock_init(&m_rw_lock, NULL);
1134 init(cct, total_blocks, 0, def);
1135 if (!def && unaligned_blocks) {
1137 * Mark extra padded blocks used from beginning.
1139 set_blocks_used(total_blocks - m_extra_blocks, m_extra_blocks);
1143 void BitAllocator::lock_excl()
1145 pthread_rwlock_wrlock(&m_rw_lock);
1148 void BitAllocator::lock_shared()
1150 pthread_rwlock_rdlock(&m_rw_lock);
1153 bool BitAllocator::try_lock()
1155 bool get_lock = false;
1156 if (pthread_rwlock_trywrlock(&m_rw_lock) == 0) {
1163 void BitAllocator::unlock()
1165 pthread_rwlock_unlock(&m_rw_lock);
1168 BitAllocator::~BitAllocator()
1172 for (int64_t i = 0; i < m_child_list.size(); i++) {
1173 auto child = static_cast<BitMapArea *>(m_child_list.get_nth_item(i));
1178 pthread_rwlock_destroy(&m_rw_lock);
1182 BitAllocator::shutdown()
1184 bool get_lock = try_lock();
1186 bool get_serial_lock = try_serial_lock();
1187 assert(get_serial_lock);
1192 void BitAllocator::unreserve_blocks(int64_t unused)
1194 unreserve(unused, 0);
1197 void BitAllocator::serial_lock()
1199 if (m_alloc_mode == SERIAL) {
1200 m_serial_mutex.lock();
1204 void BitAllocator::serial_unlock()
1206 if (m_alloc_mode == SERIAL) {
1207 m_serial_mutex.unlock();
1211 bool BitAllocator::try_serial_lock()
1213 bool get_lock = false;
1214 if (m_alloc_mode == SERIAL) {
1215 if (m_serial_mutex.try_lock() == 0) {
1224 bool BitAllocator::child_check_n_lock(BitMapArea *child, int64_t required)
1226 child->lock_shared();
1228 if (child->is_exhausted()) {
1233 int64_t child_used_blocks = child->get_used_blocks();
1234 int64_t child_total_blocks = child->size();
1235 if ((child_total_blocks - child_used_blocks) < required) {
1243 void BitAllocator::child_unlock(BitMapArea *child)
1248 bool BitAllocator::check_input_dis(int64_t num_blocks)
1250 if (num_blocks == 0 || num_blocks > size()) {
1256 bool BitAllocator::check_input(int64_t num_blocks)
1258 if (num_blocks == 0 || num_blocks > get_zone_size(cct)) {
1264 void BitAllocator::free_blocks(int64_t start_block, int64_t num_blocks)
1266 if (num_blocks == 0) {
1270 alloc_assert(start_block + num_blocks <= size());
1271 if (is_stats_on()) {
1272 m_stats->add_free_calls(1);
1273 m_stats->add_freed(num_blocks);
1277 alloc_dbg_assert(is_allocated(start_block, num_blocks));
1279 free_blocks_int(start_block, num_blocks);
1280 (void) sub_used_blocks(num_blocks);
1286 void BitAllocator::set_blocks_used(int64_t start_block, int64_t num_blocks)
1288 if (num_blocks == 0) {
1292 alloc_assert(start_block + num_blocks <= size());
1295 set_blocks_used_int(start_block, num_blocks);
1302 * Allocate N dis-contiguous blocks.
1304 int64_t BitAllocator::alloc_blocks_dis_int(int64_t num_blocks, int64_t min_alloc,
1305 int64_t hint, int64_t area_blk_off, ExtentList *block_list)
1307 return alloc_blocks_dis_int_work(true, num_blocks, min_alloc, hint,
1308 area_blk_off, block_list);
1311 int64_t BitAllocator::alloc_blocks_dis_res(int64_t num_blocks, int64_t min_alloc,
1312 int64_t hint, ExtentList *block_list)
1314 return alloc_blocks_dis_work(num_blocks, min_alloc, hint, block_list, true);
1317 int64_t BitAllocator::alloc_blocks_dis_work(int64_t num_blocks, int64_t min_alloc,
1318 int64_t hint, ExtentList *block_list, bool reserved)
1321 int64_t allocated = 0;
1323 * This is root so offset is 0 yet.
1325 int64_t blk_off = 0;
1327 if (!check_input_dis(num_blocks)) {
1331 if (is_stats_on()) {
1332 m_stats->add_alloc_calls(1);
1333 m_stats->add_allocated(num_blocks);
1338 if (!reserved && !reserve_blocks(num_blocks)) {
1342 if (is_stats_on()) {
1343 m_stats->add_concurrent_scans(scans);
1346 while (scans && allocated < num_blocks) {
1347 allocated += alloc_blocks_dis_int(num_blocks - allocated, min_alloc, hint + allocated, blk_off, block_list);
1351 if (allocated < num_blocks) {
1353 * Could not find anything in concurrent scan.
1354 * Go in serial manner to get something for sure
1361 allocated += alloc_blocks_dis_int(num_blocks - allocated, min_alloc, hint + allocated,
1362 blk_off, block_list);
1363 if (is_stats_on()) {
1364 m_stats->add_serial_scans(1);
1368 unreserve(num_blocks, allocated);
1369 alloc_dbg_assert(is_allocated_dis(block_list, allocated));
1378 bool BitAllocator::is_allocated_dis(ExtentList *blocks, int64_t num_blocks)
1381 for (int64_t j = 0; j < blocks->get_extent_count(); j++) {
1382 auto p = blocks->get_nth_extent(j);
1384 if (!is_allocated(p.first, p.second)) {
1389 alloc_assert(count == num_blocks);
1393 void BitAllocator::free_blocks_dis(int64_t num_blocks, ExtentList *block_list)
1397 if (is_stats_on()) {
1398 m_stats->add_free_calls(1);
1399 m_stats->add_freed(num_blocks);
1402 for (int64_t i = 0; i < block_list->get_extent_count(); i++) {
1403 free_blocks_int(block_list->get_nth_extent(i).first,
1404 block_list->get_nth_extent(i).second);
1405 freed += block_list->get_nth_extent(i).second;
1408 alloc_assert(num_blocks == freed);
1409 sub_used_blocks(num_blocks);
1410 alloc_assert(get_used_blocks() >= 0);
1414 void BitAllocator::dump()
1418 dump_state(cct, count);