These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / fs / btrfs / disk-io.c
index 2ef9a4b..41fb431 100644 (file)
@@ -319,9 +319,9 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
                        memcpy(&found, result, csum_size);
 
                        read_extent_buffer(buf, &val, 0, csum_size);
-                       printk_ratelimited(KERN_WARNING
-                               "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
-                               "level %d\n",
+                       btrfs_warn_rl(fs_info,
+                               "%s checksum verify failed on %llu wanted %X found %X "
+                               "level %d",
                                fs_info->sb->s_id, buf->start,
                                val, found, btrfs_header_level(buf));
                        if (result != (char *)&inline_result)
@@ -368,9 +368,9 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
                ret = 0;
                goto out;
        }
-       printk_ratelimited(KERN_ERR
-           "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
-                       eb->fs_info->sb->s_id, eb->start,
+       btrfs_err_rl(eb->fs_info,
+               "parent transid verify failed on %llu wanted %llu found %llu",
+                       eb->start,
                        parent_transid, btrfs_header_generation(eb));
        ret = 1;
 
@@ -629,15 +629,14 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 
        found_start = btrfs_header_bytenr(eb);
        if (found_start != eb->start) {
-               printk_ratelimited(KERN_ERR "BTRFS (device %s): bad tree block start "
-                              "%llu %llu\n",
-                              eb->fs_info->sb->s_id, found_start, eb->start);
+               btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu",
+                              found_start, eb->start);
                ret = -EIO;
                goto err;
        }
        if (check_tree_block_fsid(root->fs_info, eb)) {
-               printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
-                              eb->fs_info->sb->s_id, eb->start);
+               btrfs_err_rl(eb->fs_info, "bad fsid on block %llu",
+                              eb->start);
                ret = -EIO;
                goto err;
        }
@@ -703,7 +702,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
        return -EIO;    /* we fixed nothing */
 }
 
-static void end_workqueue_bio(struct bio *bio, int err)
+static void end_workqueue_bio(struct bio *bio)
 {
        struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
        struct btrfs_fs_info *fs_info;
@@ -711,7 +710,7 @@ static void end_workqueue_bio(struct bio *bio, int err)
        btrfs_work_func_t func;
 
        fs_info = end_io_wq->info;
-       end_io_wq->error = err;
+       end_io_wq->error = bio->bi_error;
 
        if (bio->bi_rw & REQ_WRITE) {
                if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@ -802,13 +801,17 @@ static void run_one_async_done(struct btrfs_work *work)
        limit = btrfs_async_submit_limit(fs_info);
        limit = limit * 2 / 3;
 
+       /*
+        * atomic_dec_return implies a barrier for waitqueue_active
+        */
        if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
            waitqueue_active(&fs_info->async_submit_wait))
                wake_up(&fs_info->async_submit_wait);
 
        /* If an error occured we just want to clean up the bio and move on */
        if (async->error) {
-               bio_endio(async->bio, async->error);
+               async->bio->bi_error = async->error;
+               bio_endio(async->bio);
                return;
        }
 
@@ -908,8 +911,10 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
         * submission context.  Just jump into btrfs_map_bio
         */
        ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
-       if (ret)
-               bio_endio(bio, ret);
+       if (ret) {
+               bio->bi_error = ret;
+               bio_endio(bio);
+       }
        return ret;
 }
 
@@ -960,10 +965,13 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                                          __btree_submit_bio_done);
        }
 
-       if (ret) {
+       if (ret)
+               goto out_w_error;
+       return 0;
+
 out_w_error:
-               bio_endio(bio, ret);
-       }
+       bio->bi_error = ret;
+       bio_endio(bio);
        return ret;
 }
 
@@ -1149,12 +1157,12 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 
        buf = btrfs_find_create_tree_block(root, bytenr);
        if (!buf)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
        if (ret) {
                free_extent_buffer(buf);
-               return NULL;
+               return ERR_PTR(ret);
        }
        return buf;
 
@@ -1259,6 +1267,7 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
        atomic_set(&root->orphan_inodes, 0);
        atomic_set(&root->refs, 1);
        atomic_set(&root->will_be_snapshoted, 0);
+       atomic_set(&root->qgroup_meta_rsv, 0);
        root->log_transid = 0;
        root->log_transid_committed = -1;
        root->last_log_commit = 0;
@@ -1509,20 +1518,19 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
        generation = btrfs_root_generation(&root->root_item);
        root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
                                     generation);
-       if (!root->node) {
-               ret = -ENOMEM;
+       if (IS_ERR(root->node)) {
+               ret = PTR_ERR(root->node);
                goto find_fail;
        } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
                ret = -EIO;
-               goto read_fail;
+               free_extent_buffer(root->node);
+               goto find_fail;
        }
        root->commit_root = btrfs_root_node(root);
 out:
        btrfs_free_path(path);
        return root;
 
-read_fail:
-       free_extent_buffer(root->node);
 find_fail:
        kfree(root);
 alloc_fail:
@@ -1574,8 +1582,23 @@ int btrfs_init_fs_root(struct btrfs_root *root)
        ret = get_anon_bdev(&root->anon_dev);
        if (ret)
                goto free_writers;
+
+       mutex_lock(&root->objectid_mutex);
+       ret = btrfs_find_highest_objectid(root,
+                                       &root->highest_objectid);
+       if (ret) {
+               mutex_unlock(&root->objectid_mutex);
+               goto free_root_dev;
+       }
+
+       ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+
+       mutex_unlock(&root->objectid_mutex);
+
        return 0;
 
+free_root_dev:
+       free_anon_bdev(root->anon_dev);
 free_writers:
        btrfs_free_subvolume_writers(root->subv_writers);
 fail:
@@ -1725,6 +1748,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
        bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
        bdi->congested_fn       = btrfs_congested_fn;
        bdi->congested_data     = info;
+       bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
        return 0;
 }
 
@@ -1736,22 +1760,22 @@ static void end_workqueue_fn(struct btrfs_work *work)
 {
        struct bio *bio;
        struct btrfs_end_io_wq *end_io_wq;
-       int error;
 
        end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
        bio = end_io_wq->bio;
 
-       error = end_io_wq->error;
+       bio->bi_error = end_io_wq->error;
        bio->bi_private = end_io_wq->private;
        bio->bi_end_io = end_io_wq->end_io;
        kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
-       bio_endio_nodec(bio, error);
+       bio_endio(bio);
 }
 
 static int cleaner_kthread(void *arg)
 {
        struct btrfs_root *root = arg;
        int again;
+       struct btrfs_trans_handle *trans;
 
        do {
                again = 0;
@@ -1772,8 +1796,10 @@ static int cleaner_kthread(void *arg)
                        goto sleep;
                }
 
+               mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
                btrfs_run_delayed_iputs(root);
-               btrfs_delete_unused_bgs(root->fs_info);
+               mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
+
                again = btrfs_clean_one_deleted_snapshot(root);
                mutex_unlock(&root->fs_info->cleaner_mutex);
 
@@ -1782,6 +1808,16 @@ static int cleaner_kthread(void *arg)
                 * needn't do anything special here.
                 */
                btrfs_run_defrag_inodes(root->fs_info);
+
+               /*
+                * Acquires fs_info->delete_unused_bgs_mutex to avoid racing
+                * with relocation (btrfs_relocate_chunk) and relocation
+                * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group)
+                * after acquiring fs_info->delete_unused_bgs_mutex. So we
+                * can't hold, nor need to, fs_info->cleaner_mutex when deleting
+                * unused block groups.
+                */
+               btrfs_delete_unused_bgs(root->fs_info);
 sleep:
                if (!try_to_freeze() && !again) {
                        set_current_state(TASK_INTERRUPTIBLE);
@@ -1790,6 +1826,34 @@ sleep:
                        __set_current_state(TASK_RUNNING);
                }
        } while (!kthread_should_stop());
+
+       /*
+        * Transaction kthread is stopped before us and wakes us up.
+        * However we might have started a new transaction and COWed some
+        * tree blocks when deleting unused block groups for example. So
+        * make sure we commit the transaction we started to have a clean
+        * shutdown when evicting the btree inode - if it has dirty pages
+        * when we do the final iput() on it, eviction will trigger a
+        * writeback for it which will fail with null pointer dereferences
+        * since work queues and other resources were already released and
+        * destroyed by the time the iput/eviction/writeback is made.
+        */
+       trans = btrfs_attach_transaction(root);
+       if (IS_ERR(trans)) {
+               if (PTR_ERR(trans) != -ENOENT)
+                       btrfs_err(root->fs_info,
+                                 "cleaner transaction attach returned %ld",
+                                 PTR_ERR(trans));
+       } else {
+               int ret;
+
+               ret = btrfs_commit_transaction(trans, root);
+               if (ret)
+                       btrfs_err(root->fs_info,
+                                 "cleaner open transaction commit returned %d",
+                                 ret);
+       }
+
        return 0;
 }
 
@@ -2305,8 +2369,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
        u64 bytenr = btrfs_super_log_root(disk_super);
 
        if (fs_devices->rw_devices == 0) {
-               printk(KERN_WARNING "BTRFS: log replay required "
-                      "on RO media\n");
+               btrfs_warn(fs_info, "log replay required on RO media");
                return -EIO;
        }
 
@@ -2320,9 +2383,13 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
 
        log_tree_root->node = read_tree_block(tree_root, bytenr,
                        fs_info->generation + 1);
-       if (!log_tree_root->node ||
-           !extent_buffer_uptodate(log_tree_root->node)) {
-               printk(KERN_ERR "BTRFS: failed to read log tree\n");
+       if (IS_ERR(log_tree_root->node)) {
+               btrfs_warn(fs_info, "failed to read log tree");
+               ret = PTR_ERR(log_tree_root->node);
+               kfree(log_tree_root);
+               return ret;
+       } else if (!extent_buffer_uptodate(log_tree_root->node)) {
+               btrfs_err(fs_info, "failed to read log tree");
                free_extent_buffer(log_tree_root->node);
                kfree(log_tree_root);
                return -EIO;
@@ -2330,7 +2397,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
        /* returns with log_tree_root freed on success */
        ret = btrfs_recover_log_trees(log_tree_root);
        if (ret) {
-               btrfs_error(tree_root->fs_info, ret,
+               btrfs_std_error(tree_root->fs_info, ret,
                            "Failed to recover log tree");
                free_extent_buffer(log_tree_root->node);
                kfree(log_tree_root);
@@ -2489,12 +2556,12 @@ int open_ctree(struct super_block *sb,
        spin_lock_init(&fs_info->unused_bgs_lock);
        rwlock_init(&fs_info->tree_mod_log_lock);
        mutex_init(&fs_info->unused_bg_unpin_mutex);
+       mutex_init(&fs_info->delete_unused_bgs_mutex);
        mutex_init(&fs_info->reloc_mutex);
        mutex_init(&fs_info->delalloc_root_mutex);
+       mutex_init(&fs_info->cleaner_delayed_iput_mutex);
        seqlock_init(&fs_info->profiles_lock);
-       init_rwsem(&fs_info->delayed_iput_sem);
 
-       init_completion(&fs_info->kobj_unregister);
        INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
        INIT_LIST_HEAD(&fs_info->space_info);
        INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
@@ -2525,7 +2592,7 @@ int open_ctree(struct super_block *sb,
        fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
        fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
        /* readahead state */
-       INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
+       INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
        spin_lock_init(&fs_info->reada_lock);
 
        fs_info->thread_pool_size = min_t(unsigned long,
@@ -2567,7 +2634,6 @@ int open_ctree(struct super_block *sb,
 
 
        mutex_init(&fs_info->ordered_operations_mutex);
-       mutex_init(&fs_info->ordered_extent_flush_mutex);
        mutex_init(&fs_info->tree_log_mutex);
        mutex_init(&fs_info->chunk_mutex);
        mutex_init(&fs_info->transaction_kthread_mutex);
@@ -2607,8 +2673,8 @@ int open_ctree(struct super_block *sb,
         * Read super block and check the signature bytes only
         */
        bh = btrfs_read_dev_super(fs_devices->latest_bdev);
-       if (!bh) {
-               err = -EINVAL;
+       if (IS_ERR(bh)) {
+               err = PTR_ERR(bh);
                goto fail_alloc;
        }
 
@@ -2619,6 +2685,7 @@ int open_ctree(struct super_block *sb,
        if (btrfs_check_super_csum(bh->b_data)) {
                printk(KERN_ERR "BTRFS: superblock checksum mismatch\n");
                err = -EINVAL;
+               brelse(bh);
                goto fail_alloc;
        }
 
@@ -2797,10 +2864,13 @@ int open_ctree(struct super_block *sb,
        chunk_root->node = read_tree_block(chunk_root,
                                           btrfs_super_chunk_root(disk_super),
                                           generation);
-       if (!chunk_root->node ||
-           !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
+       if (IS_ERR(chunk_root->node) ||
+           !extent_buffer_uptodate(chunk_root->node)) {
                printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
                       sb->s_id);
+               if (!IS_ERR(chunk_root->node))
+                       free_extent_buffer(chunk_root->node);
+               chunk_root->node = NULL;
                goto fail_tree_roots;
        }
        btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
@@ -2834,11 +2904,13 @@ retry_root_backup:
        tree_root->node = read_tree_block(tree_root,
                                          btrfs_super_root(disk_super),
                                          generation);
-       if (!tree_root->node ||
-           !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
+       if (IS_ERR(tree_root->node) ||
+           !extent_buffer_uptodate(tree_root->node)) {
                printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
                       sb->s_id);
-
+               if (!IS_ERR(tree_root->node))
+                       free_extent_buffer(tree_root->node);
+               tree_root->node = NULL;
                goto recovery_tree_root;
        }
 
@@ -2846,6 +2918,18 @@ retry_root_backup:
        tree_root->commit_root = btrfs_root_node(tree_root);
        btrfs_set_root_refs(&tree_root->root_item, 1);
 
+       mutex_lock(&tree_root->objectid_mutex);
+       ret = btrfs_find_highest_objectid(tree_root,
+                                       &tree_root->highest_objectid);
+       if (ret) {
+               mutex_unlock(&tree_root->objectid_mutex);
+               goto recovery_tree_root;
+       }
+
+       ASSERT(tree_root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+
+       mutex_unlock(&tree_root->objectid_mutex);
+
        ret = btrfs_read_roots(fs_info, tree_root);
        if (ret)
                goto recovery_tree_root;
@@ -2874,12 +2958,24 @@ retry_root_backup:
 
        btrfs_close_extra_devices(fs_devices, 1);
 
-       ret = btrfs_sysfs_add_one(fs_info);
+       ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
        if (ret) {
-               pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
+               pr_err("BTRFS: failed to init sysfs fsid interface: %d\n", ret);
                goto fail_block_groups;
        }
 
+       ret = btrfs_sysfs_add_device(fs_devices);
+       if (ret) {
+               pr_err("BTRFS: failed to init sysfs device interface: %d\n", ret);
+               goto fail_fsdev_sysfs;
+       }
+
+       ret = btrfs_sysfs_add_mounted(fs_info);
+       if (ret) {
+               pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
+               goto fail_fsdev_sysfs;
+       }
+
        ret = btrfs_init_space_info(fs_info);
        if (ret) {
                printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret);
@@ -2896,8 +2992,9 @@ retry_root_backup:
        if (fs_info->fs_devices->missing_devices >
             fs_info->num_tolerated_disk_barrier_failures &&
            !(sb->s_flags & MS_RDONLY)) {
-               printk(KERN_WARNING "BTRFS: "
-                       "too many missing devices, writeable mount is not allowed\n");
+               pr_warn("BTRFS: missing devices(%llu) exceeds the limit(%d), writeable mount is not allowed\n",
+                       fs_info->fs_devices->missing_devices,
+                       fs_info->num_tolerated_disk_barrier_failures);
                goto fail_sysfs;
        }
 
@@ -3053,7 +3150,10 @@ fail_cleaner:
        filemap_write_and_wait(fs_info->btree_inode->i_mapping);
 
 fail_sysfs:
-       btrfs_sysfs_remove_one(fs_info);
+       btrfs_sysfs_remove_mounted(fs_info);
+
+fail_fsdev_sysfs:
+       btrfs_sysfs_remove_fsid(fs_info->fs_devices);
 
 fail_block_groups:
        btrfs_put_block_group_cache(fs_info);
@@ -3112,8 +3212,8 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
                struct btrfs_device *device = (struct btrfs_device *)
                        bh->b_private;
 
-               printk_ratelimited_in_rcu(KERN_WARNING "BTRFS: lost page write due to "
-                                         "I/O error on %s\n",
+               btrfs_warn_rl_in_rcu(device->dev_root->fs_info,
+                               "lost page write due to IO error on %s",
                                          rcu_str_deref(device->name));
                /* note, we dont' set_buffer_write_io_error because we have
                 * our own ways of dealing with the IO errors
@@ -3125,6 +3225,37 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
        put_bh(bh);
 }
 
+int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
+                       struct buffer_head **bh_ret)
+{
+       struct buffer_head *bh;
+       struct btrfs_super_block *super;
+       u64 bytenr;
+
+       bytenr = btrfs_sb_offset(copy_num);
+       if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
+               return -EINVAL;
+
+       bh = __bread(bdev, bytenr / 4096, BTRFS_SUPER_INFO_SIZE);
+       /*
+        * If we fail to read from the underlying devices, as of now
+        * the best option we have is to mark it EIO.
+        */
+       if (!bh)
+               return -EIO;
+
+       super = (struct btrfs_super_block *)bh->b_data;
+       if (btrfs_super_bytenr(super) != bytenr ||
+                   btrfs_super_magic(super) != BTRFS_MAGIC) {
+               brelse(bh);
+               return -EINVAL;
+       }
+
+       *bh_ret = bh;
+       return 0;
+}
+
+
 struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
 {
        struct buffer_head *bh;
@@ -3132,7 +3263,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
        struct btrfs_super_block *super;
        int i;
        u64 transid = 0;
-       u64 bytenr;
+       int ret = -EINVAL;
 
        /* we would like to check all the supers, but that would make
         * a btrfs mount succeed after a mkfs from a different FS.
@@ -3140,21 +3271,11 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
         * later supers, using BTRFS_SUPER_MIRROR_MAX instead
         */
        for (i = 0; i < 1; i++) {
-               bytenr = btrfs_sb_offset(i);
-               if (bytenr + BTRFS_SUPER_INFO_SIZE >=
-                                       i_size_read(bdev->bd_inode))
-                       break;
-               bh = __bread(bdev, bytenr / 4096,
-                                       BTRFS_SUPER_INFO_SIZE);
-               if (!bh)
+               ret = btrfs_read_dev_one_super(bdev, i, &bh);
+               if (ret)
                        continue;
 
                super = (struct btrfs_super_block *)bh->b_data;
-               if (btrfs_super_bytenr(super) != bytenr ||
-                   btrfs_super_magic(super) != BTRFS_MAGIC) {
-                       brelse(bh);
-                       continue;
-               }
 
                if (!latest || btrfs_super_generation(super) > transid) {
                        brelse(latest);
@@ -3164,6 +3285,10 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
                        brelse(bh);
                }
        }
+
+       if (!latest)
+               return ERR_PTR(ret);
+
        return latest;
 }
 
@@ -3232,8 +3357,9 @@ static int write_dev_supers(struct btrfs_device *device,
                        bh = __getblk(device->bdev, bytenr / 4096,
                                      BTRFS_SUPER_INFO_SIZE);
                        if (!bh) {
-                               printk(KERN_ERR "BTRFS: couldn't get super "
-                                      "buffer head for bytenr %Lu\n", bytenr);
+                               btrfs_err(device->dev_root->fs_info,
+                                   "couldn't get super buffer head for bytenr %llu",
+                                   bytenr);
                                errors++;
                                continue;
                        }
@@ -3267,13 +3393,8 @@ static int write_dev_supers(struct btrfs_device *device,
  * endio for the write_dev_flush, this will wake anyone waiting
  * for the barrier when it is done
  */
-static void btrfs_end_empty_barrier(struct bio *bio, int err)
+static void btrfs_end_empty_barrier(struct bio *bio)
 {
-       if (err) {
-               if (err == -EOPNOTSUPP)
-                       set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-               clear_bit(BIO_UPTODATE, &bio->bi_flags);
-       }
        if (bio->bi_private)
                complete(bio->bi_private);
        bio_put(bio);
@@ -3301,12 +3422,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
 
                wait_for_completion(&device->flush_wait);
 
-               if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
-                       printk_in_rcu("BTRFS: disabling barriers on dev %s\n",
-                                     rcu_str_deref(device->name));
-                       device->nobarriers = 1;
-               } else if (!bio_flagged(bio, BIO_UPTODATE)) {
-                       ret = -EIO;
+               if (bio->bi_error) {
+                       ret = bio->bi_error;
                        btrfs_dev_stat_inc_and_print(device,
                                BTRFS_DEV_STAT_FLUSH_ERRS);
                }
@@ -3389,6 +3506,35 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
        return 0;
 }
 
+int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
+{
+       int raid_type;
+       int min_tolerated = INT_MAX;
+
+       if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 ||
+           (flags & BTRFS_AVAIL_ALLOC_BIT_SINGLE))
+               min_tolerated = min(min_tolerated,
+                                   btrfs_raid_array[BTRFS_RAID_SINGLE].
+                                   tolerated_failures);
+
+       for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
+               if (raid_type == BTRFS_RAID_SINGLE)
+                       continue;
+               if (!(flags & btrfs_raid_group[raid_type]))
+                       continue;
+               min_tolerated = min(min_tolerated,
+                                   btrfs_raid_array[raid_type].
+                                   tolerated_failures);
+       }
+
+       if (min_tolerated == INT_MAX) {
+               pr_warn("BTRFS: unknown raid flag: %llu\n", flags);
+               min_tolerated = 0;
+       }
+
+       return min_tolerated;
+}
+
 int btrfs_calc_num_tolerated_disk_barrier_failures(
        struct btrfs_fs_info *fs_info)
 {
@@ -3398,13 +3544,12 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
                       BTRFS_BLOCK_GROUP_SYSTEM,
                       BTRFS_BLOCK_GROUP_METADATA,
                       BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
-       int num_types = 4;
        int i;
        int c;
        int num_tolerated_disk_barrier_failures =
                (int)fs_info->fs_devices->num_devices;
 
-       for (i = 0; i < num_types; i++) {
+       for (i = 0; i < ARRAY_SIZE(types); i++) {
                struct btrfs_space_info *tmp;
 
                sinfo = NULL;
@@ -3422,44 +3567,21 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
 
                down_read(&sinfo->groups_sem);
                for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
-                       if (!list_empty(&sinfo->block_groups[c])) {
-                               u64 flags;
-
-                               btrfs_get_block_group_info(
-                                       &sinfo->block_groups[c], &space);
-                               if (space.total_bytes == 0 ||
-                                   space.used_bytes == 0)
-                                       continue;
-                               flags = space.flags;
-                               /*
-                                * return
-                                * 0: if dup, single or RAID0 is configured for
-                                *    any of metadata, system or data, else
-                                * 1: if RAID5 is configured, or if RAID1 or
-                                *    RAID10 is configured and only two mirrors
-                                *    are used, else
-                                * 2: if RAID6 is configured, else
-                                * num_mirrors - 1: if RAID1 or RAID10 is
-                                *                  configured and more than
-                                *                  2 mirrors are used.
-                                */
-                               if (num_tolerated_disk_barrier_failures > 0 &&
-                                   ((flags & (BTRFS_BLOCK_GROUP_DUP |
-                                              BTRFS_BLOCK_GROUP_RAID0)) ||
-                                    ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)
-                                     == 0)))
-                                       num_tolerated_disk_barrier_failures = 0;
-                               else if (num_tolerated_disk_barrier_failures > 1) {
-                                       if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
-                                           BTRFS_BLOCK_GROUP_RAID5 |
-                                           BTRFS_BLOCK_GROUP_RAID10)) {
-                                               num_tolerated_disk_barrier_failures = 1;
-                                       } else if (flags &
-                                                  BTRFS_BLOCK_GROUP_RAID6) {
-                                               num_tolerated_disk_barrier_failures = 2;
-                                       }
-                               }
-                       }
+                       u64 flags;
+
+                       if (list_empty(&sinfo->block_groups[c]))
+                               continue;
+
+                       btrfs_get_block_group_info(&sinfo->block_groups[c],
+                                                  &space);
+                       if (space.total_bytes == 0 || space.used_bytes == 0)
+                               continue;
+                       flags = space.flags;
+
+                       num_tolerated_disk_barrier_failures = min(
+                               num_tolerated_disk_barrier_failures,
+                               btrfs_get_num_tolerated_disk_barrier_failures(
+                                       flags));
                }
                up_read(&sinfo->groups_sem);
        }
@@ -3494,7 +3616,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
                if (ret) {
                        mutex_unlock(
                                &root->fs_info->fs_devices->device_list_mutex);
-                       btrfs_error(root->fs_info, ret,
+                       btrfs_std_error(root->fs_info, ret,
                                    "errors while submitting device barriers.");
                        return ret;
                }
@@ -3534,7 +3656,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
                mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
                /* FUA is masked off if unsupported and can't be the reason */
-               btrfs_error(root->fs_info, -EIO,
+               btrfs_std_error(root->fs_info, -EIO,
                            "%d errors while writing supers", total_errors);
                return -EIO;
        }
@@ -3552,7 +3674,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
        }
        mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
        if (total_errors > max_errors) {
-               btrfs_error(root->fs_info, -EIO,
+               btrfs_std_error(root->fs_info, -EIO,
                            "%d errors while writing supers", total_errors);
                return -EIO;
        }
@@ -3688,6 +3810,9 @@ void close_ctree(struct btrfs_root *root)
        fs_info->closing = 1;
        smp_mb();
 
+       /* wait for the qgroup rescan worker to stop */
+       btrfs_qgroup_wait_for_completion(fs_info);
+
        /* wait for the uuid_scan task to finish */
        down(&fs_info->uuid_tree_rescan_sem);
        /* avoid complains from lockdep et al., set sem back to initial state */
@@ -3710,6 +3835,13 @@ void close_ctree(struct btrfs_root *root)
        cancel_work_sync(&fs_info->async_reclaim_work);
 
        if (!(fs_info->sb->s_flags & MS_RDONLY)) {
+               /*
+                * If the cleaner thread is stopped and there are
+                * block groups queued for removal, the deletion will be
+                * skipped when we quit the cleaner thread.
+                */
+               btrfs_delete_unused_bgs(root->fs_info);
+
                ret = btrfs_commit_super(root);
                if (ret)
                        btrfs_err(fs_info, "commit super ret %d", ret);
@@ -3731,7 +3863,8 @@ void close_ctree(struct btrfs_root *root)
                       percpu_counter_sum(&fs_info->delalloc_bytes));
        }
 
-       btrfs_sysfs_remove_one(fs_info);
+       btrfs_sysfs_remove_mounted(fs_info);
+       btrfs_sysfs_remove_fsid(fs_info->fs_devices);
 
        btrfs_free_fs_roots(fs_info);
 
@@ -4060,6 +4193,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 
        while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
                struct btrfs_delayed_ref_head *head;
+               struct btrfs_delayed_ref_node *tmp;
                bool pin_bytes = false;
 
                head = rb_entry(node, struct btrfs_delayed_ref_head,
@@ -4075,11 +4209,10 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
                        continue;
                }
                spin_lock(&head->lock);
-               while ((node = rb_first(&head->ref_root)) != NULL) {
-                       ref = rb_entry(node, struct btrfs_delayed_ref_node,
-                                      rb_node);
+               list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list,
+                                                list) {
                        ref->in_tree = 0;
-                       rb_erase(&ref->rb_node, &head->ref_root);
+                       list_del(&ref->list);
                        atomic_dec(&delayed_refs->num_entries);
                        btrfs_put_delayed_ref(ref);
                }
@@ -4228,25 +4361,6 @@ again:
        return 0;
 }
 
-static void btrfs_free_pending_ordered(struct btrfs_transaction *cur_trans,
-                                      struct btrfs_fs_info *fs_info)
-{
-       struct btrfs_ordered_extent *ordered;
-
-       spin_lock(&fs_info->trans_lock);
-       while (!list_empty(&cur_trans->pending_ordered)) {
-               ordered = list_first_entry(&cur_trans->pending_ordered,
-                                          struct btrfs_ordered_extent,
-                                          trans_list);
-               list_del_init(&ordered->trans_list);
-               spin_unlock(&fs_info->trans_lock);
-
-               btrfs_put_ordered_extent(ordered);
-               spin_lock(&fs_info->trans_lock);
-       }
-       spin_unlock(&fs_info->trans_lock);
-}
-
 void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
                                   struct btrfs_root *root)
 {
@@ -4258,7 +4372,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
        cur_trans->state = TRANS_STATE_UNBLOCKED;
        wake_up(&root->fs_info->transaction_wait);
 
-       btrfs_free_pending_ordered(cur_trans, root->fs_info);
        btrfs_destroy_delayed_inodes(root);
        btrfs_assert_delayed_root_empty(root);