X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?p=kvmfornfv.git;a=blobdiff_plain;f=kernel%2Ffs%2Fbtrfs%2Fdisk-io.c;h=41fb43183406c944086ca6eefaf10c7f5a35880e;hp=2ef9a4b72d06e46d4f3a19c73e94796cb8397347;hb=e09b41010ba33a20a87472ee821fa407a5b8da36;hpb=f93b97fd65072de626c074dbe099a1fff05ce060 diff --git a/kernel/fs/btrfs/disk-io.c b/kernel/fs/btrfs/disk-io.c index 2ef9a4b72..41fb43183 100644 --- a/kernel/fs/btrfs/disk-io.c +++ b/kernel/fs/btrfs/disk-io.c @@ -319,9 +319,9 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, memcpy(&found, result, csum_size); read_extent_buffer(buf, &val, 0, csum_size); - printk_ratelimited(KERN_WARNING - "BTRFS: %s checksum verify failed on %llu wanted %X found %X " - "level %d\n", + btrfs_warn_rl(fs_info, + "%s checksum verify failed on %llu wanted %X found %X " + "level %d", fs_info->sb->s_id, buf->start, val, found, btrfs_header_level(buf)); if (result != (char *)&inline_result) @@ -368,9 +368,9 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, ret = 0; goto out; } - printk_ratelimited(KERN_ERR - "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n", - eb->fs_info->sb->s_id, eb->start, + btrfs_err_rl(eb->fs_info, + "parent transid verify failed on %llu wanted %llu found %llu", + eb->start, parent_transid, btrfs_header_generation(eb)); ret = 1; @@ -629,15 +629,14 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, found_start = btrfs_header_bytenr(eb); if (found_start != eb->start) { - printk_ratelimited(KERN_ERR "BTRFS (device %s): bad tree block start " - "%llu %llu\n", - eb->fs_info->sb->s_id, found_start, eb->start); + btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu", + found_start, eb->start); ret = -EIO; goto err; } if (check_tree_block_fsid(root->fs_info, eb)) { - printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n", - eb->fs_info->sb->s_id, eb->start); + btrfs_err_rl(eb->fs_info, "bad fsid on block %llu", + eb->start); ret = -EIO; goto err; } @@ -703,7 +702,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) return -EIO; /* we fixed nothing */ } -static void end_workqueue_bio(struct bio *bio, int err) +static void end_workqueue_bio(struct bio *bio) { struct btrfs_end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; @@ -711,7 +710,7 @@ static void end_workqueue_bio(struct bio *bio, int err) btrfs_work_func_t func; fs_info = end_io_wq->info; - end_io_wq->error = err; + end_io_wq->error = bio->bi_error; if (bio->bi_rw & REQ_WRITE) { if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) { @@ -802,13 +801,17 @@ static void run_one_async_done(struct btrfs_work *work) limit = btrfs_async_submit_limit(fs_info); limit = limit * 2 / 3; + /* + * atomic_dec_return implies a barrier for waitqueue_active + */ if (atomic_dec_return(&fs_info->nr_async_submits) < limit && waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); /* If an error occured we just want to clean up the bio and move on */ if (async->error) { - bio_endio(async->bio, async->error); + async->bio->bi_error = async->error; + bio_endio(async->bio); return; } @@ -908,8 +911,10 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, * submission context. Just jump into btrfs_map_bio */ ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); - if (ret) - bio_endio(bio, ret); + if (ret) { + bio->bi_error = ret; + bio_endio(bio); + } return ret; } @@ -960,10 +965,13 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, __btree_submit_bio_done); } - if (ret) { + if (ret) + goto out_w_error; + return 0; + out_w_error: - bio_endio(bio, ret); - } + bio->bi_error = ret; + bio_endio(bio); return ret; } @@ -1149,12 +1157,12 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, buf = btrfs_find_create_tree_block(root, bytenr); if (!buf) - return NULL; + return ERR_PTR(-ENOMEM); ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret) { free_extent_buffer(buf); - return NULL; + return ERR_PTR(ret); } return buf; @@ -1259,6 +1267,7 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize, atomic_set(&root->orphan_inodes, 0); atomic_set(&root->refs, 1); atomic_set(&root->will_be_snapshoted, 0); + atomic_set(&root->qgroup_meta_rsv, 0); root->log_transid = 0; root->log_transid_committed = -1; root->last_log_commit = 0; @@ -1509,20 +1518,19 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, generation = btrfs_root_generation(&root->root_item); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), generation); - if (!root->node) { - ret = -ENOMEM; + if (IS_ERR(root->node)) { + ret = PTR_ERR(root->node); goto find_fail; } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) { ret = -EIO; - goto read_fail; + free_extent_buffer(root->node); + goto find_fail; } root->commit_root = btrfs_root_node(root); out: btrfs_free_path(path); return root; -read_fail: - free_extent_buffer(root->node); find_fail: kfree(root); alloc_fail: @@ -1574,8 +1582,23 @@ int btrfs_init_fs_root(struct btrfs_root *root) ret = get_anon_bdev(&root->anon_dev); if (ret) goto free_writers; + + mutex_lock(&root->objectid_mutex); + ret = btrfs_find_highest_objectid(root, + &root->highest_objectid); + if (ret) { + mutex_unlock(&root->objectid_mutex); + goto free_root_dev; + } + + ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID); + + mutex_unlock(&root->objectid_mutex); + return 0; +free_root_dev: + free_anon_bdev(root->anon_dev); free_writers: btrfs_free_subvolume_writers(root->subv_writers); fail: @@ -1725,6 +1748,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE; bdi->congested_fn = btrfs_congested_fn; bdi->congested_data = info; + bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; return 0; } @@ -1736,22 +1760,22 @@ static void end_workqueue_fn(struct btrfs_work *work) { struct bio *bio; struct btrfs_end_io_wq *end_io_wq; - int error; end_io_wq = container_of(work, struct btrfs_end_io_wq, work); bio = end_io_wq->bio; - error = end_io_wq->error; + bio->bi_error = end_io_wq->error; bio->bi_private = end_io_wq->private; bio->bi_end_io = end_io_wq->end_io; kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); - bio_endio_nodec(bio, error); + bio_endio(bio); } static int cleaner_kthread(void *arg) { struct btrfs_root *root = arg; int again; + struct btrfs_trans_handle *trans; do { again = 0; @@ -1772,8 +1796,10 @@ static int cleaner_kthread(void *arg) goto sleep; } + mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex); btrfs_run_delayed_iputs(root); - btrfs_delete_unused_bgs(root->fs_info); + mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex); + again = btrfs_clean_one_deleted_snapshot(root); mutex_unlock(&root->fs_info->cleaner_mutex); @@ -1782,6 +1808,16 @@ static int cleaner_kthread(void *arg) * needn't do anything special here. */ btrfs_run_defrag_inodes(root->fs_info); + + /* + * Acquires fs_info->delete_unused_bgs_mutex to avoid racing + * with relocation (btrfs_relocate_chunk) and relocation + * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group) + * after acquiring fs_info->delete_unused_bgs_mutex. So we + * can't hold, nor need to, fs_info->cleaner_mutex when deleting + * unused block groups. + */ + btrfs_delete_unused_bgs(root->fs_info); sleep: if (!try_to_freeze() && !again) { set_current_state(TASK_INTERRUPTIBLE); @@ -1790,6 +1826,34 @@ sleep: __set_current_state(TASK_RUNNING); } } while (!kthread_should_stop()); + + /* + * Transaction kthread is stopped before us and wakes us up. + * However we might have started a new transaction and COWed some + * tree blocks when deleting unused block groups for example. So + * make sure we commit the transaction we started to have a clean + * shutdown when evicting the btree inode - if it has dirty pages + * when we do the final iput() on it, eviction will trigger a + * writeback for it which will fail with null pointer dereferences + * since work queues and other resources were already released and + * destroyed by the time the iput/eviction/writeback is made. + */ + trans = btrfs_attach_transaction(root); + if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT) + btrfs_err(root->fs_info, + "cleaner transaction attach returned %ld", + PTR_ERR(trans)); + } else { + int ret; + + ret = btrfs_commit_transaction(trans, root); + if (ret) + btrfs_err(root->fs_info, + "cleaner open transaction commit returned %d", + ret); + } + return 0; } @@ -2305,8 +2369,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, u64 bytenr = btrfs_super_log_root(disk_super); if (fs_devices->rw_devices == 0) { - printk(KERN_WARNING "BTRFS: log replay required " - "on RO media\n"); + btrfs_warn(fs_info, "log replay required on RO media"); return -EIO; } @@ -2320,9 +2383,13 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, log_tree_root->node = read_tree_block(tree_root, bytenr, fs_info->generation + 1); - if (!log_tree_root->node || - !extent_buffer_uptodate(log_tree_root->node)) { - printk(KERN_ERR "BTRFS: failed to read log tree\n"); + if (IS_ERR(log_tree_root->node)) { + btrfs_warn(fs_info, "failed to read log tree"); + ret = PTR_ERR(log_tree_root->node); + kfree(log_tree_root); + return ret; + } else if (!extent_buffer_uptodate(log_tree_root->node)) { + btrfs_err(fs_info, "failed to read log tree"); free_extent_buffer(log_tree_root->node); kfree(log_tree_root); return -EIO; @@ -2330,7 +2397,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, /* returns with log_tree_root freed on success */ ret = btrfs_recover_log_trees(log_tree_root); if (ret) { - btrfs_error(tree_root->fs_info, ret, + btrfs_std_error(tree_root->fs_info, ret, "Failed to recover log tree"); free_extent_buffer(log_tree_root->node); kfree(log_tree_root); @@ -2489,12 +2556,12 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->unused_bgs_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->unused_bg_unpin_mutex); + mutex_init(&fs_info->delete_unused_bgs_mutex); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); + mutex_init(&fs_info->cleaner_delayed_iput_mutex); seqlock_init(&fs_info->profiles_lock); - init_rwsem(&fs_info->delayed_iput_sem); - init_completion(&fs_info->kobj_unregister); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); @@ -2525,7 +2592,7 @@ int open_ctree(struct super_block *sb, fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */ /* readahead state */ - INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); + INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); spin_lock_init(&fs_info->reada_lock); fs_info->thread_pool_size = min_t(unsigned long, @@ -2567,7 +2634,6 @@ int open_ctree(struct super_block *sb, mutex_init(&fs_info->ordered_operations_mutex); - mutex_init(&fs_info->ordered_extent_flush_mutex); mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); @@ -2607,8 +2673,8 @@ int open_ctree(struct super_block *sb, * Read super block and check the signature bytes only */ bh = btrfs_read_dev_super(fs_devices->latest_bdev); - if (!bh) { - err = -EINVAL; + if (IS_ERR(bh)) { + err = PTR_ERR(bh); goto fail_alloc; } @@ -2619,6 +2685,7 @@ int open_ctree(struct super_block *sb, if (btrfs_check_super_csum(bh->b_data)) { printk(KERN_ERR "BTRFS: superblock checksum mismatch\n"); err = -EINVAL; + brelse(bh); goto fail_alloc; } @@ -2797,10 +2864,13 @@ int open_ctree(struct super_block *sb, chunk_root->node = read_tree_block(chunk_root, btrfs_super_chunk_root(disk_super), generation); - if (!chunk_root->node || - !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { + if (IS_ERR(chunk_root->node) || + !extent_buffer_uptodate(chunk_root->node)) { printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n", sb->s_id); + if (!IS_ERR(chunk_root->node)) + free_extent_buffer(chunk_root->node); + chunk_root->node = NULL; goto fail_tree_roots; } btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); @@ -2834,11 +2904,13 @@ retry_root_backup: tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), generation); - if (!tree_root->node || - !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { + if (IS_ERR(tree_root->node) || + !extent_buffer_uptodate(tree_root->node)) { printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", sb->s_id); - + if (!IS_ERR(tree_root->node)) + free_extent_buffer(tree_root->node); + tree_root->node = NULL; goto recovery_tree_root; } @@ -2846,6 +2918,18 @@ retry_root_backup: tree_root->commit_root = btrfs_root_node(tree_root); btrfs_set_root_refs(&tree_root->root_item, 1); + mutex_lock(&tree_root->objectid_mutex); + ret = btrfs_find_highest_objectid(tree_root, + &tree_root->highest_objectid); + if (ret) { + mutex_unlock(&tree_root->objectid_mutex); + goto recovery_tree_root; + } + + ASSERT(tree_root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID); + + mutex_unlock(&tree_root->objectid_mutex); + ret = btrfs_read_roots(fs_info, tree_root); if (ret) goto recovery_tree_root; @@ -2874,12 +2958,24 @@ retry_root_backup: btrfs_close_extra_devices(fs_devices, 1); - ret = btrfs_sysfs_add_one(fs_info); + ret = btrfs_sysfs_add_fsid(fs_devices, NULL); if (ret) { - pr_err("BTRFS: failed to init sysfs interface: %d\n", ret); + pr_err("BTRFS: failed to init sysfs fsid interface: %d\n", ret); goto fail_block_groups; } + ret = btrfs_sysfs_add_device(fs_devices); + if (ret) { + pr_err("BTRFS: failed to init sysfs device interface: %d\n", ret); + goto fail_fsdev_sysfs; + } + + ret = btrfs_sysfs_add_mounted(fs_info); + if (ret) { + pr_err("BTRFS: failed to init sysfs interface: %d\n", ret); + goto fail_fsdev_sysfs; + } + ret = btrfs_init_space_info(fs_info); if (ret) { printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret); @@ -2896,8 +2992,9 @@ retry_root_backup: if (fs_info->fs_devices->missing_devices > fs_info->num_tolerated_disk_barrier_failures && !(sb->s_flags & MS_RDONLY)) { - printk(KERN_WARNING "BTRFS: " - "too many missing devices, writeable mount is not allowed\n"); + pr_warn("BTRFS: missing devices(%llu) exceeds the limit(%d), writeable mount is not allowed\n", + fs_info->fs_devices->missing_devices, + fs_info->num_tolerated_disk_barrier_failures); goto fail_sysfs; } @@ -3053,7 +3150,10 @@ fail_cleaner: filemap_write_and_wait(fs_info->btree_inode->i_mapping); fail_sysfs: - btrfs_sysfs_remove_one(fs_info); + btrfs_sysfs_remove_mounted(fs_info); + +fail_fsdev_sysfs: + btrfs_sysfs_remove_fsid(fs_info->fs_devices); fail_block_groups: btrfs_put_block_group_cache(fs_info); @@ -3112,8 +3212,8 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) struct btrfs_device *device = (struct btrfs_device *) bh->b_private; - printk_ratelimited_in_rcu(KERN_WARNING "BTRFS: lost page write due to " - "I/O error on %s\n", + btrfs_warn_rl_in_rcu(device->dev_root->fs_info, + "lost page write due to IO error on %s", rcu_str_deref(device->name)); /* note, we dont' set_buffer_write_io_error because we have * our own ways of dealing with the IO errors @@ -3125,6 +3225,37 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) put_bh(bh); } +int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num, + struct buffer_head **bh_ret) +{ + struct buffer_head *bh; + struct btrfs_super_block *super; + u64 bytenr; + + bytenr = btrfs_sb_offset(copy_num); + if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode)) + return -EINVAL; + + bh = __bread(bdev, bytenr / 4096, BTRFS_SUPER_INFO_SIZE); + /* + * If we fail to read from the underlying devices, as of now + * the best option we have is to mark it EIO. + */ + if (!bh) + return -EIO; + + super = (struct btrfs_super_block *)bh->b_data; + if (btrfs_super_bytenr(super) != bytenr || + btrfs_super_magic(super) != BTRFS_MAGIC) { + brelse(bh); + return -EINVAL; + } + + *bh_ret = bh; + return 0; +} + + struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) { struct buffer_head *bh; @@ -3132,7 +3263,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) struct btrfs_super_block *super; int i; u64 transid = 0; - u64 bytenr; + int ret = -EINVAL; /* we would like to check all the supers, but that would make * a btrfs mount succeed after a mkfs from a different FS. @@ -3140,21 +3271,11 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) * later supers, using BTRFS_SUPER_MIRROR_MAX instead */ for (i = 0; i < 1; i++) { - bytenr = btrfs_sb_offset(i); - if (bytenr + BTRFS_SUPER_INFO_SIZE >= - i_size_read(bdev->bd_inode)) - break; - bh = __bread(bdev, bytenr / 4096, - BTRFS_SUPER_INFO_SIZE); - if (!bh) + ret = btrfs_read_dev_one_super(bdev, i, &bh); + if (ret) continue; super = (struct btrfs_super_block *)bh->b_data; - if (btrfs_super_bytenr(super) != bytenr || - btrfs_super_magic(super) != BTRFS_MAGIC) { - brelse(bh); - continue; - } if (!latest || btrfs_super_generation(super) > transid) { brelse(latest); @@ -3164,6 +3285,10 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) brelse(bh); } } + + if (!latest) + return ERR_PTR(ret); + return latest; } @@ -3232,8 +3357,9 @@ static int write_dev_supers(struct btrfs_device *device, bh = __getblk(device->bdev, bytenr / 4096, BTRFS_SUPER_INFO_SIZE); if (!bh) { - printk(KERN_ERR "BTRFS: couldn't get super " - "buffer head for bytenr %Lu\n", bytenr); + btrfs_err(device->dev_root->fs_info, + "couldn't get super buffer head for bytenr %llu", + bytenr); errors++; continue; } @@ -3267,13 +3393,8 @@ static int write_dev_supers(struct btrfs_device *device, * endio for the write_dev_flush, this will wake anyone waiting * for the barrier when it is done */ -static void btrfs_end_empty_barrier(struct bio *bio, int err) +static void btrfs_end_empty_barrier(struct bio *bio) { - if (err) { - if (err == -EOPNOTSUPP) - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - clear_bit(BIO_UPTODATE, &bio->bi_flags); - } if (bio->bi_private) complete(bio->bi_private); bio_put(bio); @@ -3301,12 +3422,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait) wait_for_completion(&device->flush_wait); - if (bio_flagged(bio, BIO_EOPNOTSUPP)) { - printk_in_rcu("BTRFS: disabling barriers on dev %s\n", - rcu_str_deref(device->name)); - device->nobarriers = 1; - } else if (!bio_flagged(bio, BIO_UPTODATE)) { - ret = -EIO; + if (bio->bi_error) { + ret = bio->bi_error; btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_FLUSH_ERRS); } @@ -3389,6 +3506,35 @@ static int barrier_all_devices(struct btrfs_fs_info *info) return 0; } +int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags) +{ + int raid_type; + int min_tolerated = INT_MAX; + + if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 || + (flags & BTRFS_AVAIL_ALLOC_BIT_SINGLE)) + min_tolerated = min(min_tolerated, + btrfs_raid_array[BTRFS_RAID_SINGLE]. + tolerated_failures); + + for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) { + if (raid_type == BTRFS_RAID_SINGLE) + continue; + if (!(flags & btrfs_raid_group[raid_type])) + continue; + min_tolerated = min(min_tolerated, + btrfs_raid_array[raid_type]. + tolerated_failures); + } + + if (min_tolerated == INT_MAX) { + pr_warn("BTRFS: unknown raid flag: %llu\n", flags); + min_tolerated = 0; + } + + return min_tolerated; +} + int btrfs_calc_num_tolerated_disk_barrier_failures( struct btrfs_fs_info *fs_info) { @@ -3398,13 +3544,12 @@ int btrfs_calc_num_tolerated_disk_barrier_failures( BTRFS_BLOCK_GROUP_SYSTEM, BTRFS_BLOCK_GROUP_METADATA, BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; - int num_types = 4; int i; int c; int num_tolerated_disk_barrier_failures = (int)fs_info->fs_devices->num_devices; - for (i = 0; i < num_types; i++) { + for (i = 0; i < ARRAY_SIZE(types); i++) { struct btrfs_space_info *tmp; sinfo = NULL; @@ -3422,44 +3567,21 @@ int btrfs_calc_num_tolerated_disk_barrier_failures( down_read(&sinfo->groups_sem); for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { - if (!list_empty(&sinfo->block_groups[c])) { - u64 flags; - - btrfs_get_block_group_info( - &sinfo->block_groups[c], &space); - if (space.total_bytes == 0 || - space.used_bytes == 0) - continue; - flags = space.flags; - /* - * return - * 0: if dup, single or RAID0 is configured for - * any of metadata, system or data, else - * 1: if RAID5 is configured, or if RAID1 or - * RAID10 is configured and only two mirrors - * are used, else - * 2: if RAID6 is configured, else - * num_mirrors - 1: if RAID1 or RAID10 is - * configured and more than - * 2 mirrors are used. - */ - if (num_tolerated_disk_barrier_failures > 0 && - ((flags & (BTRFS_BLOCK_GROUP_DUP | - BTRFS_BLOCK_GROUP_RAID0)) || - ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) - == 0))) - num_tolerated_disk_barrier_failures = 0; - else if (num_tolerated_disk_barrier_failures > 1) { - if (flags & (BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_RAID10)) { - num_tolerated_disk_barrier_failures = 1; - } else if (flags & - BTRFS_BLOCK_GROUP_RAID6) { - num_tolerated_disk_barrier_failures = 2; - } - } - } + u64 flags; + + if (list_empty(&sinfo->block_groups[c])) + continue; + + btrfs_get_block_group_info(&sinfo->block_groups[c], + &space); + if (space.total_bytes == 0 || space.used_bytes == 0) + continue; + flags = space.flags; + + num_tolerated_disk_barrier_failures = min( + num_tolerated_disk_barrier_failures, + btrfs_get_num_tolerated_disk_barrier_failures( + flags)); } up_read(&sinfo->groups_sem); } @@ -3494,7 +3616,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) if (ret) { mutex_unlock( &root->fs_info->fs_devices->device_list_mutex); - btrfs_error(root->fs_info, ret, + btrfs_std_error(root->fs_info, ret, "errors while submitting device barriers."); return ret; } @@ -3534,7 +3656,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); /* FUA is masked off if unsupported and can't be the reason */ - btrfs_error(root->fs_info, -EIO, + btrfs_std_error(root->fs_info, -EIO, "%d errors while writing supers", total_errors); return -EIO; } @@ -3552,7 +3674,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) } mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (total_errors > max_errors) { - btrfs_error(root->fs_info, -EIO, + btrfs_std_error(root->fs_info, -EIO, "%d errors while writing supers", total_errors); return -EIO; } @@ -3688,6 +3810,9 @@ void close_ctree(struct btrfs_root *root) fs_info->closing = 1; smp_mb(); + /* wait for the qgroup rescan worker to stop */ + btrfs_qgroup_wait_for_completion(fs_info); + /* wait for the uuid_scan task to finish */ down(&fs_info->uuid_tree_rescan_sem); /* avoid complains from lockdep et al., set sem back to initial state */ @@ -3710,6 +3835,13 @@ void close_ctree(struct btrfs_root *root) cancel_work_sync(&fs_info->async_reclaim_work); if (!(fs_info->sb->s_flags & MS_RDONLY)) { + /* + * If the cleaner thread is stopped and there are + * block groups queued for removal, the deletion will be + * skipped when we quit the cleaner thread. + */ + btrfs_delete_unused_bgs(root->fs_info); + ret = btrfs_commit_super(root); if (ret) btrfs_err(fs_info, "commit super ret %d", ret); @@ -3731,7 +3863,8 @@ void close_ctree(struct btrfs_root *root) percpu_counter_sum(&fs_info->delalloc_bytes)); } - btrfs_sysfs_remove_one(fs_info); + btrfs_sysfs_remove_mounted(fs_info); + btrfs_sysfs_remove_fsid(fs_info->fs_devices); btrfs_free_fs_roots(fs_info); @@ -4060,6 +4193,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, while ((node = rb_first(&delayed_refs->href_root)) != NULL) { struct btrfs_delayed_ref_head *head; + struct btrfs_delayed_ref_node *tmp; bool pin_bytes = false; head = rb_entry(node, struct btrfs_delayed_ref_head, @@ -4075,11 +4209,10 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, continue; } spin_lock(&head->lock); - while ((node = rb_first(&head->ref_root)) != NULL) { - ref = rb_entry(node, struct btrfs_delayed_ref_node, - rb_node); + list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list, + list) { ref->in_tree = 0; - rb_erase(&ref->rb_node, &head->ref_root); + list_del(&ref->list); atomic_dec(&delayed_refs->num_entries); btrfs_put_delayed_ref(ref); } @@ -4228,25 +4361,6 @@ again: return 0; } -static void btrfs_free_pending_ordered(struct btrfs_transaction *cur_trans, - struct btrfs_fs_info *fs_info) -{ - struct btrfs_ordered_extent *ordered; - - spin_lock(&fs_info->trans_lock); - while (!list_empty(&cur_trans->pending_ordered)) { - ordered = list_first_entry(&cur_trans->pending_ordered, - struct btrfs_ordered_extent, - trans_list); - list_del_init(&ordered->trans_list); - spin_unlock(&fs_info->trans_lock); - - btrfs_put_ordered_extent(ordered); - spin_lock(&fs_info->trans_lock); - } - spin_unlock(&fs_info->trans_lock); -} - void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, struct btrfs_root *root) { @@ -4258,7 +4372,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, cur_trans->state = TRANS_STATE_UNBLOCKED; wake_up(&root->fs_info->transaction_wait); - btrfs_free_pending_ordered(cur_trans, root->fs_info); btrfs_destroy_delayed_inodes(root); btrfs_assert_delayed_root_empty(root);