These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / fs / btrfs / backref.c
index 614aaa1..e2f659d 100644 (file)
@@ -206,10 +206,33 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
                return -ENOMEM;
 
        ref->root_id = root_id;
-       if (key)
+       if (key) {
                ref->key_for_search = *key;
-       else
+               /*
+                * We can often find data backrefs with an offset that is too
+                * large (>= LLONG_MAX, maximum allowed file offset) due to
+                * underflows when subtracting a file's offset with the data
+                * offset of its corresponding extent data item. This can
+                * happen for example in the clone ioctl.
+                * So if we detect such case we set the search key's offset to
+                * zero to make sure we will find the matching file extent item
+                * at add_all_parents(), otherwise we will miss it because the
+                * offset taken form the backref is much larger then the offset
+                * of the file extent item. This can make us scan a very large
+                * number of file extent items, but at least it will not make
+                * us miss any.
+                * This is an ugly workaround for a behaviour that should have
+                * never existed, but it does and a fix for the clone ioctl
+                * would touch a lot of places, cause backwards incompatibility
+                * and would not fix the problem for extents cloned with older
+                * kernels.
+                */
+               if (ref->key_for_search.type == BTRFS_EXTENT_DATA_KEY &&
+                   ref->key_for_search.offset >= LLONG_MAX)
+                       ref->key_for_search.offset = 0;
+       } else {
                memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
+       }
 
        ref->inode_list = NULL;
        ref->level = level;
@@ -250,8 +273,12 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
         * the first item to check. But sometimes, we may enter it with
         * slot==nritems. In that case, go to the next leaf before we continue.
         */
-       if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
-               ret = btrfs_next_old_leaf(root, path, time_seq);
+       if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+               if (time_seq == (u64)-1)
+                       ret = btrfs_next_leaf(root, path);
+               else
+                       ret = btrfs_next_old_leaf(root, path, time_seq);
+       }
 
        while (!ret && count < total_refs) {
                eb = path->nodes[0];
@@ -291,7 +318,10 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
                        eie = NULL;
                }
 next:
-               ret = btrfs_next_old_item(root, path, time_seq);
+               if (time_seq == (u64)-1)
+                       ret = btrfs_next_item(root, path);
+               else
+                       ret = btrfs_next_old_item(root, path, time_seq);
        }
 
        if (ret > 0)
@@ -325,15 +355,23 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
 
        index = srcu_read_lock(&fs_info->subvol_srcu);
 
-       root = btrfs_read_fs_root_no_name(fs_info, &root_key);
+       root = btrfs_get_fs_root(fs_info, &root_key, false);
        if (IS_ERR(root)) {
                srcu_read_unlock(&fs_info->subvol_srcu, index);
                ret = PTR_ERR(root);
                goto out;
        }
 
+       if (btrfs_test_is_dummy_root(root)) {
+               srcu_read_unlock(&fs_info->subvol_srcu, index);
+               ret = -ENOENT;
+               goto out;
+       }
+
        if (path->search_commit_root)
                root_level = btrfs_header_level(root->commit_root);
+       else if (time_seq == (u64)-1)
+               root_level = btrfs_header_level(root->node);
        else
                root_level = btrfs_old_root_level(root, time_seq);
 
@@ -343,7 +381,12 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
        }
 
        path->lowest_level = level;
-       ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
+       if (time_seq == (u64)-1)
+               ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path,
+                                       0, 0);
+       else
+               ret = btrfs_search_old_slot(root, &ref->key_for_search, path,
+                                           time_seq);
 
        /* root node has been locked, we can release @subvol_srcu safely here */
        srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -491,7 +534,9 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
                BUG_ON(!ref->wanted_disk_byte);
                eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
                                     0);
-               if (!eb || !extent_buffer_uptodate(eb)) {
+               if (IS_ERR(eb)) {
+                       return PTR_ERR(eb);
+               } else if (!extent_buffer_uptodate(eb)) {
                        free_extent_buffer(eb);
                        return -EIO;
                }
@@ -507,7 +552,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
 }
 
 /*
- * merge two lists of backrefs and adjust counts accordingly
+ * merge backrefs and adjust counts accordingly
  *
  * mode = 1: merge identical keys, if key is set
  *    FIXME: if we add more keys in __add_prelim_ref, we can merge more here.
@@ -535,9 +580,9 @@ static void __merge_refs(struct list_head *head, int mode)
 
                        ref2 = list_entry(pos2, struct __prelim_ref, list);
 
+                       if (!ref_for_same_block(ref1, ref2))
+                               continue;
                        if (mode == 1) {
-                               if (!ref_for_same_block(ref1, ref2))
-                                       continue;
                                if (!ref1->parent && ref2->parent) {
                                        xchg = ref1;
                                        ref1 = ref2;
@@ -572,8 +617,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                              struct list_head *prefs, u64 *total_refs,
                              u64 inum)
 {
+       struct btrfs_delayed_ref_node *node;
        struct btrfs_delayed_extent_op *extent_op = head->extent_op;
-       struct rb_node *n = &head->node.rb_node;
        struct btrfs_key key;
        struct btrfs_key op_key = {0};
        int sgn;
@@ -583,12 +628,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
 
        spin_lock(&head->lock);
-       n = rb_first(&head->ref_root);
-       while (n) {
-               struct btrfs_delayed_ref_node *node;
-               node = rb_entry(n, struct btrfs_delayed_ref_node,
-                               rb_node);
-               n = rb_next(n);
+       list_for_each_entry(node, &head->ref_list, list) {
                if (node->seq > seq)
                        continue;
 
@@ -621,7 +661,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                        struct btrfs_delayed_tree_ref *ref;
 
                        ref = btrfs_delayed_node_to_tree_ref(node);
-                       ret = __add_prelim_ref(prefs, ref->root, NULL,
+                       ret = __add_prelim_ref(prefs, 0, NULL,
                                               ref->level + 1, ref->parent,
                                               node->bytenr,
                                               node->ref_mod * sgn, GFP_ATOMIC);
@@ -653,11 +693,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                        struct btrfs_delayed_data_ref *ref;
 
                        ref = btrfs_delayed_node_to_data_ref(node);
-
-                       key.objectid = ref->objectid;
-                       key.type = BTRFS_EXTENT_DATA_KEY;
-                       key.offset = ref->offset;
-                       ret = __add_prelim_ref(prefs, ref->root, &key, 0,
+                       ret = __add_prelim_ref(prefs, 0, NULL, 0,
                                               ref->parent, node->bytenr,
                                               node->ref_mod * sgn, GFP_ATOMIC);
                        break;
@@ -882,6 +918,11 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
  *
  * NOTE: This can return values > 0
  *
+ * If time_seq is set to (u64)-1, it will not search delayed_refs, and behave
+ * much like trans == NULL case, the difference only lies in it will not
+ * commit root.
+ * The special case is for qgroup to search roots in commit_transaction().
+ *
  * FIXME some caching might speed things up
  */
 static int find_parent_nodes(struct btrfs_trans_handle *trans,
@@ -920,6 +961,9 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
                path->skip_locking = 1;
        }
 
+       if (time_seq == (u64)-1)
+               path->skip_locking = 1;
+
        /*
         * grab both a lock on the path and a lock on the delayed ref head.
         * We need both to get a consistent picture of how the refs look
@@ -934,9 +978,10 @@ again:
        BUG_ON(ret == 0);
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-       if (trans && likely(trans->type != __TRANS_DUMMY)) {
+       if (trans && likely(trans->type != __TRANS_DUMMY) &&
+           time_seq != (u64)-1) {
 #else
-       if (trans) {
+       if (trans && time_seq != (u64)-1) {
 #endif
                /*
                 * look if there are updates for this ref queued and lock the
@@ -1034,7 +1079,10 @@ again:
 
                                eb = read_tree_block(fs_info->extent_root,
                                                           ref->parent, 0);
-                               if (!eb || !extent_buffer_uptodate(eb)) {
+                               if (IS_ERR(eb)) {
+                                       ret = PTR_ERR(eb);
+                                       goto out;
+                               } else if (!extent_buffer_uptodate(eb)) {
                                        free_extent_buffer(eb);
                                        ret = -EIO;
                                        goto out;
@@ -1369,7 +1417,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
                        read_extent_buffer(eb, dest + bytes_left,
                                           name_off, name_len);
                if (eb != eb_in) {
-                       btrfs_tree_read_unlock_blocking(eb);
+                       if (!path->skip_locking)
+                               btrfs_tree_read_unlock_blocking(eb);
                        free_extent_buffer(eb);
                }
                ret = btrfs_find_item(fs_root, path, parent, 0,
@@ -1389,9 +1438,10 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
                eb = path->nodes[0];
                /* make sure we can use eb after releasing the path */
                if (eb != eb_in) {
-                       atomic_inc(&eb->refs);
-                       btrfs_tree_read_lock(eb);
-                       btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+                       if (!path->skip_locking)
+                               btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+                       path->nodes[0] = NULL;
+                       path->locks[0] = 0;
                }
                btrfs_release_path(path);
                iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
@@ -1786,7 +1836,6 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
        int found = 0;
        struct extent_buffer *eb;
        struct btrfs_inode_extref *extref;
-       struct extent_buffer *leaf;
        u32 item_size;
        u32 cur_offset;
        unsigned long ptr;
@@ -1814,9 +1863,8 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
                btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
                btrfs_release_path(path);
 
-               leaf = path->nodes[0];
-               item_size = btrfs_item_size_nr(leaf, slot);
-               ptr = btrfs_item_ptr_offset(leaf, slot);
+               item_size = btrfs_item_size_nr(eb, slot);
+               ptr = btrfs_item_ptr_offset(eb, slot);
                cur_offset = 0;
 
                while (cur_offset < item_size) {
@@ -1830,7 +1878,7 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
                        if (ret)
                                break;
 
-                       cur_offset += btrfs_inode_extref_name_len(leaf, extref);
+                       cur_offset += btrfs_inode_extref_name_len(eb, extref);
                        cur_offset += sizeof(*extref);
                }
                btrfs_tree_read_unlock_blocking(eb);