These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / fs / btrfs / volumes.c
index 174f5e1..9c62a6f 100644 (file)
 #include "dev-replace.h"
 #include "sysfs.h"
 
+const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
+       [BTRFS_RAID_RAID10] = {
+               .sub_stripes    = 2,
+               .dev_stripes    = 1,
+               .devs_max       = 0,    /* 0 == as many as possible */
+               .devs_min       = 4,
+               .tolerated_failures = 1,
+               .devs_increment = 2,
+               .ncopies        = 2,
+       },
+       [BTRFS_RAID_RAID1] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 2,
+               .devs_min       = 2,
+               .tolerated_failures = 1,
+               .devs_increment = 2,
+               .ncopies        = 2,
+       },
+       [BTRFS_RAID_DUP] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 2,
+               .devs_max       = 1,
+               .devs_min       = 1,
+               .tolerated_failures = 0,
+               .devs_increment = 1,
+               .ncopies        = 2,
+       },
+       [BTRFS_RAID_RAID0] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 0,
+               .devs_min       = 2,
+               .tolerated_failures = 0,
+               .devs_increment = 1,
+               .ncopies        = 1,
+       },
+       [BTRFS_RAID_SINGLE] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 1,
+               .devs_min       = 1,
+               .tolerated_failures = 0,
+               .devs_increment = 1,
+               .ncopies        = 1,
+       },
+       [BTRFS_RAID_RAID5] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 0,
+               .devs_min       = 2,
+               .tolerated_failures = 1,
+               .devs_increment = 1,
+               .ncopies        = 2,
+       },
+       [BTRFS_RAID_RAID6] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 0,
+               .devs_min       = 3,
+               .tolerated_failures = 2,
+               .devs_increment = 1,
+               .ncopies        = 3,
+       },
+};
+
+const u64 const btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
+       [BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
+       [BTRFS_RAID_RAID1]  = BTRFS_BLOCK_GROUP_RAID1,
+       [BTRFS_RAID_DUP]    = BTRFS_BLOCK_GROUP_DUP,
+       [BTRFS_RAID_RAID0]  = BTRFS_BLOCK_GROUP_RAID0,
+       [BTRFS_RAID_SINGLE] = 0,
+       [BTRFS_RAID_RAID5]  = BTRFS_BLOCK_GROUP_RAID5,
+       [BTRFS_RAID_RAID6]  = BTRFS_BLOCK_GROUP_RAID6,
+};
+
 static int init_first_rw_device(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                struct btrfs_device *device);
@@ -52,6 +128,10 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
 
 DEFINE_MUTEX(uuid_mutex);
 static LIST_HEAD(fs_uuids);
+struct list_head *btrfs_get_fs_uuids(void)
+{
+       return &fs_uuids;
+}
 
 static struct btrfs_fs_devices *__alloc_fs_devices(void)
 {
@@ -152,8 +232,9 @@ static struct btrfs_device *__alloc_device(void)
        spin_lock_init(&dev->reada_lock);
        atomic_set(&dev->reada_in_flight, 0);
        atomic_set(&dev->dev_stats_ccnt, 0);
-       INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
-       INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
+       btrfs_device_data_ordered_init(dev);
+       INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+       INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 
        return dev;
 }
@@ -194,7 +275,6 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
 
        if (IS_ERR(*bdev)) {
                ret = PTR_ERR(*bdev);
-               printk(KERN_INFO "BTRFS: open %s failed\n", device_path);
                goto error;
        }
 
@@ -207,8 +287,8 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
        }
        invalidate_bdev(*bdev);
        *bh = btrfs_read_dev_super(*bdev);
-       if (!*bh) {
-               ret = -EINVAL;
+       if (IS_ERR(*bh)) {
+               ret = PTR_ERR(*bh);
                blkdev_put(*bdev, flags);
                goto error;
        }
@@ -341,11 +421,14 @@ loop_lock:
                pending = pending->bi_next;
                cur->bi_next = NULL;
 
+               /*
+                * atomic_dec_return implies a barrier for waitqueue_active
+                */
                if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
                    waitqueue_active(&fs_info->async_submit_wait))
                        wake_up(&fs_info->async_submit_wait);
 
-               BUG_ON(atomic_read(&cur->bi_cnt) == 0);
+               BUG_ON(atomic_read(&cur->__bi_cnt) == 0);
 
                /*
                 * if we're doing the sync list, record that our
@@ -441,6 +524,61 @@ static void pending_bios_fn(struct btrfs_work *work)
        run_scheduled_bios(device);
 }
 
+
+void btrfs_free_stale_device(struct btrfs_device *cur_dev)
+{
+       struct btrfs_fs_devices *fs_devs;
+       struct btrfs_device *dev;
+
+       if (!cur_dev->name)
+               return;
+
+       list_for_each_entry(fs_devs, &fs_uuids, list) {
+               int del = 1;
+
+               if (fs_devs->opened)
+                       continue;
+               if (fs_devs->seeding)
+                       continue;
+
+               list_for_each_entry(dev, &fs_devs->devices, dev_list) {
+
+                       if (dev == cur_dev)
+                               continue;
+                       if (!dev->name)
+                               continue;
+
+                       /*
+                        * Todo: This won't be enough. What if the same device
+                        * comes back (with new uuid and) with its mapper path?
+                        * But for now, this does help as mostly an admin will
+                        * either use mapper or non mapper path throughout.
+                        */
+                       rcu_read_lock();
+                       del = strcmp(rcu_str_deref(dev->name),
+                                               rcu_str_deref(cur_dev->name));
+                       rcu_read_unlock();
+                       if (!del)
+                               break;
+               }
+
+               if (!del) {
+                       /* delete the stale device */
+                       if (fs_devs->num_devices == 1) {
+                               btrfs_sysfs_remove_fsid(fs_devs);
+                               list_del(&fs_devs->list);
+                               free_fs_devices(fs_devs);
+                       } else {
+                               fs_devs->num_devices--;
+                               list_del(&dev->dev_list);
+                               rcu_string_free(dev->name);
+                               kfree(dev);
+                       }
+                       break;
+               }
+       }
+}
+
 /*
  * Add new device to list of registered devices
  *
@@ -556,6 +694,12 @@ static noinline int device_list_add(const char *path,
        if (!fs_devices->opened)
                device->generation = found_transid;
 
+       /*
+        * if there is new btrfs on an already registered device,
+        * then remove the stale device entry.
+        */
+       btrfs_free_stale_device(device);
+
        *fs_devices_ret = fs_devices;
 
        return ret;
@@ -693,43 +837,14 @@ static void free_device(struct rcu_head *head)
 
 static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 {
-       struct btrfs_device *device;
+       struct btrfs_device *device, *tmp;
 
        if (--fs_devices->opened > 0)
                return 0;
 
        mutex_lock(&fs_devices->device_list_mutex);
-       list_for_each_entry(device, &fs_devices->devices, dev_list) {
-               struct btrfs_device *new_device;
-               struct rcu_string *name;
-
-               if (device->bdev)
-                       fs_devices->open_devices--;
-
-               if (device->writeable &&
-                   device->devid != BTRFS_DEV_REPLACE_DEVID) {
-                       list_del_init(&device->dev_alloc_list);
-                       fs_devices->rw_devices--;
-               }
-
-               if (device->missing)
-                       fs_devices->missing_devices--;
-
-               new_device = btrfs_alloc_device(NULL, &device->devid,
-                                               device->uuid);
-               BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
-
-               /* Safe because we are under uuid_mutex */
-               if (device->name) {
-                       name = rcu_string_strdup(device->name->str, GFP_NOFS);
-                       BUG_ON(!name); /* -ENOMEM */
-                       rcu_assign_pointer(new_device->name, name);
-               }
-
-               list_replace_rcu(&device->dev_list, &new_device->dev_list);
-               new_device->fs_devices = device->fs_devices;
-
-               call_rcu(&device->rcu, free_device);
+       list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
+               btrfs_close_one_device(device);
        }
        mutex_unlock(&fs_devices->device_list_mutex);
 
@@ -1051,15 +1166,18 @@ out:
        return ret;
 }
 
-static int contains_pending_extent(struct btrfs_trans_handle *trans,
+static int contains_pending_extent(struct btrfs_transaction *transaction,
                                   struct btrfs_device *device,
                                   u64 *start, u64 len)
 {
+       struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
        struct extent_map *em;
-       struct list_head *search_list = &trans->transaction->pending_chunks;
+       struct list_head *search_list = &fs_info->pinned_chunks;
        int ret = 0;
        u64 physical_start = *start;
 
+       if (transaction)
+               search_list = &transaction->pending_chunks;
 again:
        list_for_each_entry(em, search_list, list) {
                struct map_lookup *map;
@@ -1067,19 +1185,35 @@ again:
 
                map = (struct map_lookup *)em->bdev;
                for (i = 0; i < map->num_stripes; i++) {
+                       u64 end;
+
                        if (map->stripes[i].dev != device)
                                continue;
                        if (map->stripes[i].physical >= physical_start + len ||
                            map->stripes[i].physical + em->orig_block_len <=
                            physical_start)
                                continue;
-                       *start = map->stripes[i].physical +
-                               em->orig_block_len;
-                       ret = 1;
+                       /*
+                        * Make sure that while processing the pinned list we do
+                        * not override our *start with a lower value, because
+                        * we can have pinned chunks that fall within this
+                        * device hole and that have lower physical addresses
+                        * than the pending chunks we processed before. If we
+                        * do not take this special care we can end up getting
+                        * 2 pending chunks that start at the same physical
+                        * device offsets because the end offset of a pinned
+                        * chunk can be equal to the start offset of some
+                        * pending chunk.
+                        */
+                       end = map->stripes[i].physical + em->orig_block_len;
+                       if (end > *start) {
+                               *start = end;
+                               ret = 1;
+                       }
                }
        }
-       if (search_list == &trans->transaction->pending_chunks) {
-               search_list = &trans->root->fs_info->pinned_chunks;
+       if (search_list != &fs_info->pinned_chunks) {
+               search_list = &fs_info->pinned_chunks;
                goto again;
        }
 
@@ -1088,12 +1222,13 @@ again:
 
 
 /*
- * find_free_dev_extent - find free space in the specified device
- * @device:    the device which we search the free space in
- * @num_bytes: the size of the free space that we need
- * @start:     store the start of the free space.
- * @len:       the size of the free space. that we find, or the size of the max
- *             free space if we don't find suitable free space
+ * find_free_dev_extent_start - find free space in the specified device
+ * @device:      the device which we search the free space in
+ * @num_bytes:   the size of the free space that we need
+ * @search_start: the position from which to begin the search
+ * @start:       store the start of the free space.
+ * @len:         the size of the free space. that we find, or the size
+ *               of the max free space if we don't find suitable free space
  *
  * this uses a pretty simple search, the expectation is that it is
  * called very infrequently and that a given device has a small number
@@ -1107,9 +1242,9 @@ again:
  * But if we don't find suitable free space, it is used to store the size of
  * the max free space.
  */
-int find_free_dev_extent(struct btrfs_trans_handle *trans,
-                        struct btrfs_device *device, u64 num_bytes,
-                        u64 *start, u64 *len)
+int find_free_dev_extent_start(struct btrfs_transaction *transaction,
+                              struct btrfs_device *device, u64 num_bytes,
+                              u64 search_start, u64 *start, u64 *len)
 {
        struct btrfs_key key;
        struct btrfs_root *root = device->dev_root;
@@ -1119,18 +1254,19 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
        u64 max_hole_start;
        u64 max_hole_size;
        u64 extent_end;
-       u64 search_start;
        u64 search_end = device->total_bytes;
        int ret;
        int slot;
        struct extent_buffer *l;
+       u64 min_search_start;
 
-       /* FIXME use last free of some kind */
-
-       /* we don't want to overwrite the superblock on the drive,
-        * so we make sure to start at an offset of at least 1MB
+       /*
+        * We don't want to overwrite the superblock on the drive nor any area
+        * used by the boot loader (grub for example), so we make sure to start
+        * at an offset of at least 1MB.
         */
-       search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
+       min_search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
+       search_start = max(search_start, min_search_start);
 
        path = btrfs_alloc_path();
        if (!path)
@@ -1192,7 +1328,7 @@ again:
                         * Have to check before we set max_hole_start, otherwise
                         * we could end up sending back this offset anyway.
                         */
-                       if (contains_pending_extent(trans, device,
+                       if (contains_pending_extent(transaction, device,
                                                    &search_start,
                                                    hole_size)) {
                                if (key.offset >= search_start) {
@@ -1241,7 +1377,7 @@ next:
        if (search_end > search_start) {
                hole_size = search_end - search_start;
 
-               if (contains_pending_extent(trans, device, &search_start,
+               if (contains_pending_extent(transaction, device, &search_start,
                                            hole_size)) {
                        btrfs_release_path(path);
                        goto again;
@@ -1267,6 +1403,15 @@ out:
        return ret;
 }
 
+int find_free_dev_extent(struct btrfs_trans_handle *trans,
+                        struct btrfs_device *device, u64 num_bytes,
+                        u64 *start, u64 *len)
+{
+       /* FIXME use last free of some kind */
+       return find_free_dev_extent_start(trans->transaction, device,
+                                         num_bytes, 0, start, len);
+}
+
 static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
                          struct btrfs_device *device,
                          u64 start, u64 *dev_extent_len)
@@ -1307,7 +1452,7 @@ again:
                extent = btrfs_item_ptr(leaf, path->slots[0],
                                        struct btrfs_dev_extent);
        } else {
-               btrfs_error(root->fs_info, ret, "Slot search failed");
+               btrfs_std_error(root->fs_info, ret, "Slot search failed");
                goto out;
        }
 
@@ -1315,10 +1460,10 @@ again:
 
        ret = btrfs_del_item(trans, root, path);
        if (ret) {
-               btrfs_error(root->fs_info, ret,
+               btrfs_std_error(root->fs_info, ret,
                            "Failed to remove dev extent item");
        } else {
-               trans->transaction->have_free_bgs = 1;
+               set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
        }
 out:
        btrfs_free_path(path);
@@ -1706,7 +1851,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        if (device->bdev) {
                device->fs_devices->open_devices--;
                /* remove sysfs entry */
-               btrfs_kobj_rm_device(root->fs_info, device);
+               btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device);
        }
 
        call_rcu(&device->rcu, free_device);
@@ -1829,7 +1974,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
        if (srcdev->writeable) {
                fs_devices->rw_devices--;
                /* zero out the old super if it is writable */
-               btrfs_scratch_superblock(srcdev);
+               btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
        }
 
        if (srcdev->bdev)
@@ -1875,8 +2020,11 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
        mutex_lock(&uuid_mutex);
        WARN_ON(!tgtdev);
        mutex_lock(&fs_info->fs_devices->device_list_mutex);
+
+       btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);
+
        if (tgtdev->bdev) {
-               btrfs_scratch_superblock(tgtdev);
+               btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
                fs_info->fs_devices->open_devices--;
        }
        fs_info->fs_devices->num_devices--;
@@ -1943,10 +2091,8 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
                        }
                }
 
-               if (!*device) {
-                       btrfs_err(root->fs_info, "no missing device found");
-                       return -ENOENT;
-               }
+               if (!*device)
+                       return BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
 
                return 0;
        } else {
@@ -2211,7 +2357,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
                                    tmp + 1);
 
        /* add sysfs device entry */
-       btrfs_kobj_add_device(root->fs_info, device);
+       btrfs_sysfs_add_device_link(root->fs_info->fs_devices, device);
 
        /*
         * we've got more storage, clear any full flags on the space
@@ -2252,8 +2398,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
                 */
                snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
                                                root->fs_info->fsid);
-               if (kobject_rename(&root->fs_info->super_kobj, fsid_buf))
-                       goto error_trans;
+               if (kobject_rename(&root->fs_info->fs_devices->fsid_kobj,
+                                                               fsid_buf))
+                       btrfs_warn(root->fs_info,
+                               "sysfs: failed to create fsid for sprout");
        }
 
        root->fs_info->num_tolerated_disk_barrier_failures =
@@ -2269,7 +2417,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 
                ret = btrfs_relocate_sys_chunks(root);
                if (ret < 0)
-                       btrfs_error(root->fs_info, ret,
+                       btrfs_std_error(root->fs_info, ret,
                                    "Failed to relocate sys chunks after "
                                    "device initialization. This can be fixed "
                                    "using the \"btrfs balance\" command.");
@@ -2289,7 +2437,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 error_trans:
        btrfs_end_transaction(trans, root);
        rcu_string_free(device->name);
-       btrfs_kobj_rm_device(root->fs_info, device);
+       btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device);
        kfree(device);
 error:
        blkdev_put(bdev, FMODE_EXCL);
@@ -2514,7 +2662,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
        if (ret < 0)
                goto out;
        else if (ret > 0) { /* Logic error or corruption */
-               btrfs_error(root->fs_info, -ENOENT,
+               btrfs_std_error(root->fs_info, -ENOENT,
                            "Failed lookup while freeing chunk.");
                ret = -ENOENT;
                goto out;
@@ -2522,7 +2670,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
 
        ret = btrfs_del_item(trans, root, path);
        if (ret < 0)
-               btrfs_error(root->fs_info, ret,
+               btrfs_std_error(root->fs_info, ret,
                            "Failed to delete chunk item.");
 out:
        btrfs_free_path(path);
@@ -2609,6 +2757,9 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
                return -EINVAL;
        }
        map = (struct map_lookup *)em->bdev;
+       lock_chunks(root->fs_info->chunk_root);
+       check_system_chunk(trans, extent_root, map->type);
+       unlock_chunks(root->fs_info->chunk_root);
 
        for (i = 0; i < map->num_stripes; i++) {
                struct btrfs_device *device = map->stripes[i].dev;
@@ -2667,9 +2818,7 @@ out:
        return ret;
 }
 
-static int btrfs_relocate_chunk(struct btrfs_root *root,
-                               u64 chunk_objectid,
-                               u64 chunk_offset)
+static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset)
 {
        struct btrfs_root *extent_root;
        struct btrfs_trans_handle *trans;
@@ -2678,19 +2827,36 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
        root = root->fs_info->chunk_root;
        extent_root = root->fs_info->extent_root;
 
+       /*
+        * Prevent races with automatic removal of unused block groups.
+        * After we relocate and before we remove the chunk with offset
+        * chunk_offset, automatic removal of the block group can kick in,
+        * resulting in a failure when calling btrfs_remove_chunk() below.
+        *
+        * Make sure to acquire this mutex before doing a tree search (dev
+        * or chunk trees) to find chunks. Otherwise the cleaner kthread might
+        * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after
+        * we release the path used to search the chunk/dev tree and before
+        * the current task acquires this mutex and calls us.
+        */
+       ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex));
+
        ret = btrfs_can_relocate(extent_root, chunk_offset);
        if (ret)
                return -ENOSPC;
 
        /* step one, relocate all the extents inside this chunk */
+       btrfs_scrub_pause(root);
        ret = btrfs_relocate_block_group(extent_root, chunk_offset);
+       btrfs_scrub_continue(root);
        if (ret)
                return ret;
 
-       trans = btrfs_start_transaction(root, 0);
+       trans = btrfs_start_trans_remove_block_group(root->fs_info,
+                                                    chunk_offset);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
-               btrfs_std_error(root->fs_info, ret);
+               btrfs_std_error(root->fs_info, ret, NULL);
                return ret;
        }
 
@@ -2726,13 +2892,18 @@ again:
        key.type = BTRFS_CHUNK_ITEM_KEY;
 
        while (1) {
+               mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
                ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
-               if (ret < 0)
+               if (ret < 0) {
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                        goto error;
+               }
                BUG_ON(ret == 0); /* Corruption */
 
                ret = btrfs_previous_item(chunk_root, path, key.objectid,
                                          key.type);
+               if (ret)
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                if (ret < 0)
                        goto error;
                if (ret > 0)
@@ -2748,13 +2919,13 @@ again:
 
                if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
                        ret = btrfs_relocate_chunk(chunk_root,
-                                                  found_key.objectid,
                                                   found_key.offset);
                        if (ret == -ENOSPC)
                                failed++;
                        else
                                BUG_ON(ret);
                }
+               mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
 
                if (found_key.offset == 0)
                        break;
@@ -2888,16 +3059,19 @@ static void update_balance_args(struct btrfs_balance_control *bctl)
         * (albeit full) chunks.
         */
        if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
+           !(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
            !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
                bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
                bctl->data.usage = 90;
        }
        if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
+           !(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
            !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
                bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
                bctl->sys.usage = 90;
        }
        if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
+           !(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
            !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
                bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
                bctl->meta.usage = 90;
@@ -2949,8 +3123,41 @@ static int chunk_profiles_filter(u64 chunk_type,
        return 1;
 }
 
-static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
+static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
                              struct btrfs_balance_args *bargs)
+{
+       struct btrfs_block_group_cache *cache;
+       u64 chunk_used;
+       u64 user_thresh_min;
+       u64 user_thresh_max;
+       int ret = 1;
+
+       cache = btrfs_lookup_block_group(fs_info, chunk_offset);
+       chunk_used = btrfs_block_group_used(&cache->item);
+
+       if (bargs->usage_min == 0)
+               user_thresh_min = 0;
+       else
+               user_thresh_min = div_factor_fine(cache->key.offset,
+                                       bargs->usage_min);
+
+       if (bargs->usage_max == 0)
+               user_thresh_max = 1;
+       else if (bargs->usage_max > 100)
+               user_thresh_max = cache->key.offset;
+       else
+               user_thresh_max = div_factor_fine(cache->key.offset,
+                                       bargs->usage_max);
+
+       if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max)
+               ret = 0;
+
+       btrfs_put_block_group(cache);
+       return ret;
+}
+
+static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
+               u64 chunk_offset, struct btrfs_balance_args *bargs)
 {
        struct btrfs_block_group_cache *cache;
        u64 chunk_used, user_thresh;
@@ -2959,7 +3166,7 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
        cache = btrfs_lookup_block_group(fs_info, chunk_offset);
        chunk_used = btrfs_block_group_used(&cache->item);
 
-       if (bargs->usage == 0)
+       if (bargs->usage_min == 0)
                user_thresh = 1;
        else if (bargs->usage > 100)
                user_thresh = cache->key.offset;
@@ -3049,6 +3256,19 @@ static int chunk_vrange_filter(struct extent_buffer *leaf,
        return 1;
 }
 
+static int chunk_stripes_range_filter(struct extent_buffer *leaf,
+                              struct btrfs_chunk *chunk,
+                              struct btrfs_balance_args *bargs)
+{
+       int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+
+       if (bargs->stripes_min <= num_stripes
+                       && num_stripes <= bargs->stripes_max)
+               return 0;
+
+       return 1;
+}
+
 static int chunk_soft_convert_filter(u64 chunk_type,
                                     struct btrfs_balance_args *bargs)
 {
@@ -3095,6 +3315,9 @@ static int should_balance_chunk(struct btrfs_root *root,
        if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
            chunk_usage_filter(bctl->fs_info, chunk_offset, bargs)) {
                return 0;
+       } else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
+           chunk_usage_range_filter(bctl->fs_info, chunk_offset, bargs)) {
+               return 0;
        }
 
        /* devid filter */
@@ -3115,6 +3338,12 @@ static int should_balance_chunk(struct btrfs_root *root,
                return 0;
        }
 
+       /* stripes filter */
+       if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) &&
+           chunk_stripes_range_filter(leaf, chunk, bargs)) {
+               return 0;
+       }
+
        /* soft profile changing mode */
        if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
            chunk_soft_convert_filter(chunk_type, bargs)) {
@@ -3129,6 +3358,16 @@ static int should_balance_chunk(struct btrfs_root *root,
                        return 0;
                else
                        bargs->limit--;
+       } else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) {
+               /*
+                * Same logic as the 'limit' filter; the minimum cannot be
+                * determined here because we do not have the global informatoin
+                * about the count of all chunks that satisfy the filters.
+                */
+               if (bargs->limit_max == 0)
+                       return 0;
+               else
+                       bargs->limit_max--;
        }
 
        return 1;
@@ -3143,6 +3382,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
        struct btrfs_device *device;
        u64 old_size;
        u64 size_to_free;
+       u64 chunk_type;
        struct btrfs_chunk *chunk;
        struct btrfs_path *path;
        struct btrfs_key key;
@@ -3153,9 +3393,14 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
        int ret;
        int enospc_errors = 0;
        bool counting = true;
+       /* The single value limit and min/max limits use the same bytes in the */
        u64 limit_data = bctl->data.limit;
        u64 limit_meta = bctl->meta.limit;
        u64 limit_sys = bctl->sys.limit;
+       u32 count_data = 0;
+       u32 count_meta = 0;
+       u32 count_sys = 0;
+       int chunk_reserved = 0;
 
        /* step one make some room on all the devices */
        devices = &fs_info->fs_devices->devices;
@@ -3196,6 +3441,10 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
        spin_unlock(&fs_info->balance_lock);
 again:
        if (!counting) {
+               /*
+                * The single value limit and min/max limits use the same bytes
+                * in the
+                */
                bctl->data.limit = limit_data;
                bctl->meta.limit = limit_meta;
                bctl->sys.limit = limit_sys;
@@ -3211,9 +3460,12 @@ again:
                        goto error;
                }
 
+               mutex_lock(&fs_info->delete_unused_bgs_mutex);
                ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
-               if (ret < 0)
+               if (ret < 0) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        goto error;
+               }
 
                /*
                 * this shouldn't happen, it means the last relocate
@@ -3225,6 +3477,7 @@ again:
                ret = btrfs_previous_item(chunk_root, path, 0,
                                          BTRFS_CHUNK_ITEM_KEY);
                if (ret) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        ret = 0;
                        break;
                }
@@ -3233,10 +3486,13 @@ again:
                slot = path->slots[0];
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
 
-               if (found_key.objectid != key.objectid)
+               if (found_key.objectid != key.objectid) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        break;
+               }
 
                chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
+               chunk_type = btrfs_chunk_type(leaf, chunk);
 
                if (!counting) {
                        spin_lock(&fs_info->balance_lock);
@@ -3246,20 +3502,64 @@ again:
 
                ret = should_balance_chunk(chunk_root, leaf, chunk,
                                           found_key.offset);
+
                btrfs_release_path(path);
-               if (!ret)
+               if (!ret) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        goto loop;
+               }
 
                if (counting) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        spin_lock(&fs_info->balance_lock);
                        bctl->stat.expected++;
                        spin_unlock(&fs_info->balance_lock);
+
+                       if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
+                               count_data++;
+                       else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
+                               count_sys++;
+                       else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
+                               count_meta++;
+
+                       goto loop;
+               }
+
+               /*
+                * Apply limit_min filter, no need to check if the LIMITS
+                * filter is used, limit_min is 0 by default
+                */
+               if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
+                                       count_data < bctl->data.limit_min)
+                               || ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) &&
+                                       count_meta < bctl->meta.limit_min)
+                               || ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) &&
+                                       count_sys < bctl->sys.limit_min)) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        goto loop;
                }
 
+               if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) {
+                       trans = btrfs_start_transaction(chunk_root, 0);
+                       if (IS_ERR(trans)) {
+                               mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+                               ret = PTR_ERR(trans);
+                               goto error;
+                       }
+
+                       ret = btrfs_force_chunk_alloc(trans, chunk_root,
+                                                     BTRFS_BLOCK_GROUP_DATA);
+                       btrfs_end_transaction(trans, chunk_root);
+                       if (ret < 0) {
+                               mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+                               goto error;
+                       }
+                       chunk_reserved = 1;
+               }
+
                ret = btrfs_relocate_chunk(chunk_root,
-                                          found_key.objectid,
                                           found_key.offset);
+               mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                if (ret && ret != -ENOSPC)
                        goto error;
                if (ret == -ENOSPC) {
@@ -3331,11 +3631,20 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
        unset_balance_control(fs_info);
        ret = del_balance_item(fs_info->tree_root);
        if (ret)
-               btrfs_std_error(fs_info, ret);
+               btrfs_std_error(fs_info, ret, NULL);
 
        atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
 }
 
+/* Non-zero return value signifies invalidity */
+static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
+               u64 allowed)
+{
+       return ((bctl_arg->flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+               (!alloc_profile_is_valid(bctl_arg->target, 1) ||
+                (bctl_arg->target & ~allowed)));
+}
+
 /*
  * Should be called with both balance and volume mutexes held
  */
@@ -3393,27 +3702,21 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
        if (num_devices > 3)
                allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
                            BTRFS_BLOCK_GROUP_RAID6);
-       if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
-           (!alloc_profile_is_valid(bctl->data.target, 1) ||
-            (bctl->data.target & ~allowed))) {
+       if (validate_convert_profile(&bctl->data, allowed)) {
                btrfs_err(fs_info, "unable to start balance with target "
                           "data profile %llu",
                       bctl->data.target);
                ret = -EINVAL;
                goto out;
        }
-       if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
-           (!alloc_profile_is_valid(bctl->meta.target, 1) ||
-            (bctl->meta.target & ~allowed))) {
+       if (validate_convert_profile(&bctl->meta, allowed)) {
                btrfs_err(fs_info,
                           "unable to start balance with target metadata profile %llu",
                       bctl->meta.target);
                ret = -EINVAL;
                goto out;
        }
-       if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
-           (!alloc_profile_is_valid(bctl->sys.target, 1) ||
-            (bctl->sys.target & ~allowed))) {
+       if (validate_convert_profile(&bctl->sys, allowed)) {
                btrfs_err(fs_info,
                           "unable to start balance with target system profile %llu",
                       bctl->sys.target);
@@ -3455,23 +3758,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
        } while (read_seqretry(&fs_info->profiles_lock, seq));
 
        if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
-               int num_tolerated_disk_barrier_failures;
-               u64 target = bctl->sys.target;
-
-               num_tolerated_disk_barrier_failures =
-                       btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
-               if (num_tolerated_disk_barrier_failures > 0 &&
-                   (target &
-                    (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 |
-                     BTRFS_AVAIL_ALLOC_BIT_SINGLE)))
-                       num_tolerated_disk_barrier_failures = 0;
-               else if (num_tolerated_disk_barrier_failures > 1 &&
-                        (target &
-                         (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)))
-                       num_tolerated_disk_barrier_failures = 1;
-
-               fs_info->num_tolerated_disk_barrier_failures =
-                       num_tolerated_disk_barrier_failures;
+               fs_info->num_tolerated_disk_barrier_failures = min(
+                       btrfs_calc_num_tolerated_disk_barrier_failures(fs_info),
+                       btrfs_get_num_tolerated_disk_barrier_failures(
+                               bctl->sys.target));
        }
 
        ret = insert_balance_item(fs_info->tree_root, bctl);
@@ -3908,9 +4198,9 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
        uuid_root = btrfs_create_tree(trans, fs_info,
                                      BTRFS_UUID_TREE_OBJECTID);
        if (IS_ERR(uuid_root)) {
-               btrfs_abort_transaction(trans, tree_root,
-                                       PTR_ERR(uuid_root));
-               return PTR_ERR(uuid_root);
+               ret = PTR_ERR(uuid_root);
+               btrfs_abort_transaction(trans, tree_root, ret);
+               return ret;
        }
 
        fs_info->uuid_root = uuid_root;
@@ -3959,12 +4249,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        struct btrfs_dev_extent *dev_extent = NULL;
        struct btrfs_path *path;
        u64 length;
-       u64 chunk_objectid;
        u64 chunk_offset;
        int ret;
        int slot;
        int failed = 0;
        bool retried = false;
+       bool checked_pending_chunks = false;
        struct extent_buffer *l;
        struct btrfs_key key;
        struct btrfs_super_block *super_copy = root->fs_info->super_copy;
@@ -3998,11 +4288,16 @@ again:
        key.type = BTRFS_DEV_EXTENT_KEY;
 
        do {
+               mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
                ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-               if (ret < 0)
+               if (ret < 0) {
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                        goto done;
+               }
 
                ret = btrfs_previous_item(root, path, 0, key.type);
+               if (ret)
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                if (ret < 0)
                        goto done;
                if (ret) {
@@ -4016,6 +4311,7 @@ again:
                btrfs_item_key_to_cpu(l, &key, path->slots[0]);
 
                if (key.objectid != device->devid) {
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                        btrfs_release_path(path);
                        break;
                }
@@ -4024,15 +4320,16 @@ again:
                length = btrfs_dev_extent_length(l, dev_extent);
 
                if (key.offset + length <= new_size) {
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                        btrfs_release_path(path);
                        break;
                }
 
-               chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
                chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
                btrfs_release_path(path);
 
-               ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
+               ret = btrfs_relocate_chunk(root, chunk_offset);
+               mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                if (ret && ret != -ENOSPC)
                        goto done;
                if (ret == -ENOSPC)
@@ -4045,15 +4342,6 @@ again:
                goto again;
        } else if (failed && retried) {
                ret = -ENOSPC;
-               lock_chunks(root);
-
-               btrfs_device_set_total_bytes(device, old_size);
-               if (device->writeable)
-                       device->fs_devices->total_rw_bytes += diff;
-               spin_lock(&root->fs_info->free_chunk_lock);
-               root->fs_info->free_chunk_space += diff;
-               spin_unlock(&root->fs_info->free_chunk_lock);
-               unlock_chunks(root);
                goto done;
        }
 
@@ -4065,6 +4353,36 @@ again:
        }
 
        lock_chunks(root);
+
+       /*
+        * We checked in the above loop all device extents that were already in
+        * the device tree. However before we have updated the device's
+        * total_bytes to the new size, we might have had chunk allocations that
+        * have not complete yet (new block groups attached to transaction
+        * handles), and therefore their device extents were not yet in the
+        * device tree and we missed them in the loop above. So if we have any
+        * pending chunk using a device extent that overlaps the device range
+        * that we can not use anymore, commit the current transaction and
+        * repeat the search on the device tree - this way we guarantee we will
+        * not have chunks using device extents that end beyond 'new_size'.
+        */
+       if (!checked_pending_chunks) {
+               u64 start = new_size;
+               u64 len = old_size - new_size;
+
+               if (contains_pending_extent(trans->transaction, device,
+                                           &start, len)) {
+                       unlock_chunks(root);
+                       checked_pending_chunks = true;
+                       failed = 0;
+                       retried = false;
+                       ret = btrfs_commit_transaction(trans, root);
+                       if (ret)
+                               goto done;
+                       goto again;
+               }
+       }
+
        btrfs_device_set_disk_total_bytes(device, new_size);
        if (list_empty(&device->resized_list))
                list_add_tail(&device->resized_list,
@@ -4079,6 +4397,16 @@ again:
        btrfs_end_transaction(trans, root);
 done:
        btrfs_free_path(path);
+       if (ret) {
+               lock_chunks(root);
+               btrfs_device_set_total_bytes(device, old_size);
+               if (device->writeable)
+                       device->fs_devices->total_rw_bytes += diff;
+               spin_lock(&root->fs_info->free_chunk_lock);
+               root->fs_info->free_chunk_space += diff;
+               spin_unlock(&root->fs_info->free_chunk_lock);
+               unlock_chunks(root);
+       }
        return ret;
 }
 
@@ -4130,65 +4458,6 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
        return 0;
 }
 
-static const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
-       [BTRFS_RAID_RAID10] = {
-               .sub_stripes    = 2,
-               .dev_stripes    = 1,
-               .devs_max       = 0,    /* 0 == as many as possible */
-               .devs_min       = 4,
-               .devs_increment = 2,
-               .ncopies        = 2,
-       },
-       [BTRFS_RAID_RAID1] = {
-               .sub_stripes    = 1,
-               .dev_stripes    = 1,
-               .devs_max       = 2,
-               .devs_min       = 2,
-               .devs_increment = 2,
-               .ncopies        = 2,
-       },
-       [BTRFS_RAID_DUP] = {
-               .sub_stripes    = 1,
-               .dev_stripes    = 2,
-               .devs_max       = 1,
-               .devs_min       = 1,
-               .devs_increment = 1,
-               .ncopies        = 2,
-       },
-       [BTRFS_RAID_RAID0] = {
-               .sub_stripes    = 1,
-               .dev_stripes    = 1,
-               .devs_max       = 0,
-               .devs_min       = 2,
-               .devs_increment = 1,
-               .ncopies        = 1,
-       },
-       [BTRFS_RAID_SINGLE] = {
-               .sub_stripes    = 1,
-               .dev_stripes    = 1,
-               .devs_max       = 1,
-               .devs_min       = 1,
-               .devs_increment = 1,
-               .ncopies        = 1,
-       },
-       [BTRFS_RAID_RAID5] = {
-               .sub_stripes    = 1,
-               .dev_stripes    = 1,
-               .devs_max       = 0,
-               .devs_min       = 2,
-               .devs_increment = 1,
-               .ncopies        = 2,
-       },
-       [BTRFS_RAID_RAID6] = {
-               .sub_stripes    = 1,
-               .dev_stripes    = 1,
-               .devs_max       = 0,
-               .devs_min       = 3,
-               .devs_increment = 1,
-               .ncopies        = 3,
-       },
-};
-
 static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
 {
        /* TODO allow them to set a preferred stripe size */
@@ -4914,9 +5183,7 @@ static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
                 * and the stripes
                 */
                sizeof(u64) * (total_stripes),
-               GFP_NOFS);
-       if (!bbio)
-               return NULL;
+               GFP_NOFS|__GFP_NOFAIL);
 
        atomic_set(&bbio->error, 0);
        atomic_set(&bbio->refs, 1);
@@ -5584,26 +5851,26 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
        return 0;
 }
 
-static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err)
+static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
 {
-       if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED))
-               bio_endio_nodec(bio, err);
-       else
-               bio_endio(bio, err);
+       bio->bi_private = bbio->private;
+       bio->bi_end_io = bbio->end_io;
+       bio_endio(bio);
+
        btrfs_put_bbio(bbio);
 }
 
-static void btrfs_end_bio(struct bio *bio, int err)
+static void btrfs_end_bio(struct bio *bio)
 {
        struct btrfs_bio *bbio = bio->bi_private;
-       struct btrfs_device *dev = bbio->stripes[0].dev;
        int is_orig_bio = 0;
 
-       if (err) {
+       if (bio->bi_error) {
                atomic_inc(&bbio->error);
-               if (err == -EIO || err == -EREMOTEIO) {
+               if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) {
                        unsigned int stripe_index =
                                btrfs_io_bio(bio)->stripe_index;
+                       struct btrfs_device *dev;
 
                        BUG_ON(stripe_index >= bbio->num_stripes);
                        dev = bbio->stripes[stripe_index].dev;
@@ -5633,24 +5900,21 @@ static void btrfs_end_bio(struct bio *bio, int err)
                        bio = bbio->orig_bio;
                }
 
-               bio->bi_private = bbio->private;
-               bio->bi_end_io = bbio->end_io;
                btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
                /* only send an error to the higher layers if it is
                 * beyond the tolerance of the btrfs bio
                 */
                if (atomic_read(&bbio->error) > bbio->max_errors) {
-                       err = -EIO;
+                       bio->bi_error = -EIO;
                } else {
                        /*
                         * this bio is actually up to date, we didn't
                         * go over the max number of errors
                         */
-                       set_bit(BIO_UPTODATE, &bio->bi_flags);
-                       err = 0;
+                       bio->bi_error = 0;
                }
 
-               btrfs_end_bbio(bbio, bio, err);
+               btrfs_end_bbio(bbio, bio);
        } else if (!is_orig_bio) {
                bio_put(bio);
        }
@@ -5671,7 +5935,7 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root,
        struct btrfs_pending_bios *pending_bios;
 
        if (device->missing || !device->bdev) {
-               bio_endio(bio, -EIO);
+               bio_io_error(bio);
                return;
        }
 
@@ -5716,34 +5980,6 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root,
                                 &device->work);
 }
 
-static int bio_size_ok(struct block_device *bdev, struct bio *bio,
-                      sector_t sector)
-{
-       struct bio_vec *prev;
-       struct request_queue *q = bdev_get_queue(bdev);
-       unsigned int max_sectors = queue_max_sectors(q);
-       struct bvec_merge_data bvm = {
-               .bi_bdev = bdev,
-               .bi_sector = sector,
-               .bi_rw = bio->bi_rw,
-       };
-
-       if (WARN_ON(bio->bi_vcnt == 0))
-               return 1;
-
-       prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
-       if (bio_sectors(bio) > max_sectors)
-               return 0;
-
-       if (!q->merge_bvec_fn)
-               return 1;
-
-       bvm.bi_size = bio->bi_iter.bi_size - prev->bv_len;
-       if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len)
-               return 0;
-       return 1;
-}
-
 static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
                              struct bio *bio, u64 physical, int dev_nr,
                              int rw, int async)
@@ -5777,38 +6013,6 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
                btrfsic_submit_bio(rw, bio);
 }
 
-static int breakup_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
-                             struct bio *first_bio, struct btrfs_device *dev,
-                             int dev_nr, int rw, int async)
-{
-       struct bio_vec *bvec = first_bio->bi_io_vec;
-       struct bio *bio;
-       int nr_vecs = bio_get_nr_vecs(dev->bdev);
-       u64 physical = bbio->stripes[dev_nr].physical;
-
-again:
-       bio = btrfs_bio_alloc(dev->bdev, physical >> 9, nr_vecs, GFP_NOFS);
-       if (!bio)
-               return -ENOMEM;
-
-       while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) {
-               if (bio_add_page(bio, bvec->bv_page, bvec->bv_len,
-                                bvec->bv_offset) < bvec->bv_len) {
-                       u64 len = bio->bi_iter.bi_size;
-
-                       atomic_inc(&bbio->stripes_pending);
-                       submit_stripe_bio(root, bbio, bio, physical, dev_nr,
-                                         rw, async);
-                       physical += len;
-                       goto again;
-               }
-               bvec++;
-       }
-
-       submit_stripe_bio(root, bbio, bio, physical, dev_nr, rw, async);
-       return 0;
-}
-
 static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
 {
        atomic_inc(&bbio->error);
@@ -5816,12 +6020,10 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
                /* Shoud be the original bio. */
                WARN_ON(bio != bbio->orig_bio);
 
-               bio->bi_private = bbio->private;
-               bio->bi_end_io = bbio->end_io;
                btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
                bio->bi_iter.bi_sector = logical >> 9;
-
-               btrfs_end_bbio(bbio, bio, -EIO);
+               bio->bi_error = -EIO;
+               btrfs_end_bbio(bbio, bio);
        }
 }
 
@@ -5883,25 +6085,11 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                        continue;
                }
 
-               /*
-                * Check and see if we're ok with this bio based on it's size
-                * and offset with the given device.
-                */
-               if (!bio_size_ok(dev->bdev, first_bio,
-                                bbio->stripes[dev_nr].physical >> 9)) {
-                       ret = breakup_stripe_bio(root, bbio, first_bio, dev,
-                                                dev_nr, rw, async_submit);
-                       BUG_ON(ret);
-                       continue;
-               }
-
                if (dev_nr < total_devs - 1) {
                        bio = btrfs_bio_clone(first_bio, GFP_NOFS);
                        BUG_ON(!bio); /* -ENOMEM */
-               } else {
+               } else
                        bio = first_bio;
-                       bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED;
-               }
 
                submit_stripe_bio(root, bbio, bio,
                                  bbio->stripes[dev_nr].physical, dev_nr, rw,
@@ -6078,6 +6266,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
                                free_extent_map(em);
                                return -EIO;
                        }
+                       btrfs_warn(root->fs_info, "devid %llu uuid %pU is missing",
+                                               devid, uuid);
                }
                map->stripes[i].dev->in_fs_metadata = 1;
        }
@@ -6197,10 +6387,11 @@ static int read_one_dev(struct btrfs_root *root,
                if (!btrfs_test_opt(root, DEGRADED))
                        return -EIO;
 
-               btrfs_warn(root->fs_info, "devid %llu missing", devid);
                device = add_missing_dev(root, fs_devices, devid, dev_uuid);
                if (!device)
                        return -ENOMEM;
+               btrfs_warn(root->fs_info, "devid %llu uuid %pU missing",
+                               devid, dev_uuid);
        } else {
                if (!device->bdev && !btrfs_test_opt(root, DEGRADED))
                        return -EIO;
@@ -6322,6 +6513,14 @@ int btrfs_read_sys_array(struct btrfs_root *root)
                                goto out_short_read;
 
                        num_stripes = btrfs_chunk_num_stripes(sb, chunk);
+                       if (!num_stripes) {
+                               printk(KERN_ERR
+           "BTRFS: invalid number of stripes %u in sys_array at offset %u\n",
+                                       num_stripes, cur_offset);
+                               ret = -EIO;
+                               break;
+                       }
+
                        len = btrfs_chunk_item_size(num_stripes);
                        if (cur_offset + len > array_size)
                                goto out_short_read;
@@ -6517,8 +6716,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
        BUG_ON(!path);
        ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
        if (ret < 0) {
-               printk_in_rcu(KERN_WARNING "BTRFS: "
-                       "error %d while searching for dev_stats item for device %s!\n",
+               btrfs_warn_in_rcu(dev_root->fs_info,
+                       "error %d while searching for dev_stats item for device %s",
                              ret, rcu_str_deref(device->name));
                goto out;
        }
@@ -6528,8 +6727,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
                /* need to delete old one and insert a new one */
                ret = btrfs_del_item(trans, dev_root, path);
                if (ret != 0) {
-                       printk_in_rcu(KERN_WARNING "BTRFS: "
-                               "delete too small dev_stats item for device %s failed %d!\n",
+                       btrfs_warn_in_rcu(dev_root->fs_info,
+                               "delete too small dev_stats item for device %s failed %d",
                                      rcu_str_deref(device->name), ret);
                        goto out;
                }
@@ -6542,9 +6741,9 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
                ret = btrfs_insert_empty_item(trans, dev_root, path,
                                              &key, sizeof(*ptr));
                if (ret < 0) {
-                       printk_in_rcu(KERN_WARNING "BTRFS: "
-                                         "insert dev_stats item for device %s failed %d!\n",
-                                     rcu_str_deref(device->name), ret);
+                       btrfs_warn_in_rcu(dev_root->fs_info,
+                               "insert dev_stats item for device %s failed %d",
+                               rcu_str_deref(device->name), ret);
                        goto out;
                }
        }
@@ -6598,8 +6797,8 @@ static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
 {
        if (!dev->dev_stats_valid)
                return;
-       printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
-                          "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
+       btrfs_err_rl_in_rcu(dev->dev_root->fs_info,
+               "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
                           rcu_str_deref(dev->name),
                           btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
                           btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
@@ -6618,8 +6817,8 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
        if (i == BTRFS_DEV_STAT_VALUES_MAX)
                return; /* all values == 0, suppress message */
 
-       printk_in_rcu(KERN_INFO "BTRFS: "
-                  "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
+       btrfs_info_in_rcu(dev->dev_root->fs_info,
+               "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
               rcu_str_deref(dev->name),
               btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
               btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
@@ -6663,22 +6862,34 @@ int btrfs_get_dev_stats(struct btrfs_root *root,
        return 0;
 }
 
-int btrfs_scratch_superblock(struct btrfs_device *device)
+void btrfs_scratch_superblocks(struct block_device *bdev, char *device_path)
 {
        struct buffer_head *bh;
        struct btrfs_super_block *disk_super;
+       int copy_num;
 
-       bh = btrfs_read_dev_super(device->bdev);
-       if (!bh)
-               return -EINVAL;
-       disk_super = (struct btrfs_super_block *)bh->b_data;
+       if (!bdev)
+               return;
 
-       memset(&disk_super->magic, 0, sizeof(disk_super->magic));
-       set_buffer_dirty(bh);
-       sync_dirty_buffer(bh);
-       brelse(bh);
+       for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX;
+               copy_num++) {
 
-       return 0;
+               if (btrfs_read_dev_one_super(bdev, copy_num, &bh))
+                       continue;
+
+               disk_super = (struct btrfs_super_block *)bh->b_data;
+
+               memset(&disk_super->magic, 0, sizeof(disk_super->magic));
+               set_buffer_dirty(bh);
+               sync_dirty_buffer(bh);
+               brelse(bh);
+       }
+
+       /* Notify udev that device has changed */
+       btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
+
+       /* Update ctime/mtime for device path for libblkid */
+       update_dev_time(device_path);
 }
 
 /*
@@ -6728,3 +6939,56 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
        }
        unlock_chunks(root);
 }
+
+void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       while (fs_devices) {
+               fs_devices->fs_info = fs_info;
+               fs_devices = fs_devices->seed;
+       }
+}
+
+void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       while (fs_devices) {
+               fs_devices->fs_info = NULL;
+               fs_devices = fs_devices->seed;
+       }
+}
+
+void btrfs_close_one_device(struct btrfs_device *device)
+{
+       struct btrfs_fs_devices *fs_devices = device->fs_devices;
+       struct btrfs_device *new_device;
+       struct rcu_string *name;
+
+       if (device->bdev)
+               fs_devices->open_devices--;
+
+       if (device->writeable &&
+           device->devid != BTRFS_DEV_REPLACE_DEVID) {
+               list_del_init(&device->dev_alloc_list);
+               fs_devices->rw_devices--;
+       }
+
+       if (device->missing)
+               fs_devices->missing_devices--;
+
+       new_device = btrfs_alloc_device(NULL, &device->devid,
+                                       device->uuid);
+       BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
+
+       /* Safe because we are under uuid_mutex */
+       if (device->name) {
+               name = rcu_string_strdup(device->name->str, GFP_NOFS);
+               BUG_ON(!name); /* -ENOMEM */
+               rcu_assign_pointer(new_device->name, name);
+       }
+
+       list_replace_rcu(&device->dev_list, &new_device->dev_list);
+       new_device->fs_devices = device->fs_devices;
+
+       call_rcu(&device->rcu, free_device);
+}