X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;ds=inline;f=kernel%2Ffs%2Fbtrfs%2Fbackref.c;h=e2f659dc57457535783f4ce4848df0e770571f0f;hb=e09b41010ba33a20a87472ee821fa407a5b8da36;hp=614aaa1969bdfded3485ae9a72146269dd9101eb;hpb=9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00;p=kvmfornfv.git diff --git a/kernel/fs/btrfs/backref.c b/kernel/fs/btrfs/backref.c index 614aaa196..e2f659dc5 100644 --- a/kernel/fs/btrfs/backref.c +++ b/kernel/fs/btrfs/backref.c @@ -206,10 +206,33 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, return -ENOMEM; ref->root_id = root_id; - if (key) + if (key) { ref->key_for_search = *key; - else + /* + * We can often find data backrefs with an offset that is too + * large (>= LLONG_MAX, maximum allowed file offset) due to + * underflows when subtracting a file's offset with the data + * offset of its corresponding extent data item. This can + * happen for example in the clone ioctl. + * So if we detect such case we set the search key's offset to + * zero to make sure we will find the matching file extent item + * at add_all_parents(), otherwise we will miss it because the + * offset taken form the backref is much larger then the offset + * of the file extent item. This can make us scan a very large + * number of file extent items, but at least it will not make + * us miss any. + * This is an ugly workaround for a behaviour that should have + * never existed, but it does and a fix for the clone ioctl + * would touch a lot of places, cause backwards incompatibility + * and would not fix the problem for extents cloned with older + * kernels. + */ + if (ref->key_for_search.type == BTRFS_EXTENT_DATA_KEY && + ref->key_for_search.offset >= LLONG_MAX) + ref->key_for_search.offset = 0; + } else { memset(&ref->key_for_search, 0, sizeof(ref->key_for_search)); + } ref->inode_list = NULL; ref->level = level; @@ -250,8 +273,12 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, * the first item to check. But sometimes, we may enter it with * slot==nritems. In that case, go to the next leaf before we continue. */ - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) - ret = btrfs_next_old_leaf(root, path, time_seq); + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + if (time_seq == (u64)-1) + ret = btrfs_next_leaf(root, path); + else + ret = btrfs_next_old_leaf(root, path, time_seq); + } while (!ret && count < total_refs) { eb = path->nodes[0]; @@ -291,7 +318,10 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, eie = NULL; } next: - ret = btrfs_next_old_item(root, path, time_seq); + if (time_seq == (u64)-1) + ret = btrfs_next_item(root, path); + else + ret = btrfs_next_old_item(root, path, time_seq); } if (ret > 0) @@ -325,15 +355,23 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, index = srcu_read_lock(&fs_info->subvol_srcu); - root = btrfs_read_fs_root_no_name(fs_info, &root_key); + root = btrfs_get_fs_root(fs_info, &root_key, false); if (IS_ERR(root)) { srcu_read_unlock(&fs_info->subvol_srcu, index); ret = PTR_ERR(root); goto out; } + if (btrfs_test_is_dummy_root(root)) { + srcu_read_unlock(&fs_info->subvol_srcu, index); + ret = -ENOENT; + goto out; + } + if (path->search_commit_root) root_level = btrfs_header_level(root->commit_root); + else if (time_seq == (u64)-1) + root_level = btrfs_header_level(root->node); else root_level = btrfs_old_root_level(root, time_seq); @@ -343,7 +381,12 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, } path->lowest_level = level; - ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); + if (time_seq == (u64)-1) + ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path, + 0, 0); + else + ret = btrfs_search_old_slot(root, &ref->key_for_search, path, + time_seq); /* root node has been locked, we can release @subvol_srcu safely here */ srcu_read_unlock(&fs_info->subvol_srcu, index); @@ -491,7 +534,9 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info, BUG_ON(!ref->wanted_disk_byte); eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, 0); - if (!eb || !extent_buffer_uptodate(eb)) { + if (IS_ERR(eb)) { + return PTR_ERR(eb); + } else if (!extent_buffer_uptodate(eb)) { free_extent_buffer(eb); return -EIO; } @@ -507,7 +552,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info, } /* - * merge two lists of backrefs and adjust counts accordingly + * merge backrefs and adjust counts accordingly * * mode = 1: merge identical keys, if key is set * FIXME: if we add more keys in __add_prelim_ref, we can merge more here. @@ -535,9 +580,9 @@ static void __merge_refs(struct list_head *head, int mode) ref2 = list_entry(pos2, struct __prelim_ref, list); + if (!ref_for_same_block(ref1, ref2)) + continue; if (mode == 1) { - if (!ref_for_same_block(ref1, ref2)) - continue; if (!ref1->parent && ref2->parent) { xchg = ref1; ref1 = ref2; @@ -572,8 +617,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, struct list_head *prefs, u64 *total_refs, u64 inum) { + struct btrfs_delayed_ref_node *node; struct btrfs_delayed_extent_op *extent_op = head->extent_op; - struct rb_node *n = &head->node.rb_node; struct btrfs_key key; struct btrfs_key op_key = {0}; int sgn; @@ -583,12 +628,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, btrfs_disk_key_to_cpu(&op_key, &extent_op->key); spin_lock(&head->lock); - n = rb_first(&head->ref_root); - while (n) { - struct btrfs_delayed_ref_node *node; - node = rb_entry(n, struct btrfs_delayed_ref_node, - rb_node); - n = rb_next(n); + list_for_each_entry(node, &head->ref_list, list) { if (node->seq > seq) continue; @@ -621,7 +661,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, struct btrfs_delayed_tree_ref *ref; ref = btrfs_delayed_node_to_tree_ref(node); - ret = __add_prelim_ref(prefs, ref->root, NULL, + ret = __add_prelim_ref(prefs, 0, NULL, ref->level + 1, ref->parent, node->bytenr, node->ref_mod * sgn, GFP_ATOMIC); @@ -653,11 +693,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, struct btrfs_delayed_data_ref *ref; ref = btrfs_delayed_node_to_data_ref(node); - - key.objectid = ref->objectid; - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = ref->offset; - ret = __add_prelim_ref(prefs, ref->root, &key, 0, + ret = __add_prelim_ref(prefs, 0, NULL, 0, ref->parent, node->bytenr, node->ref_mod * sgn, GFP_ATOMIC); break; @@ -882,6 +918,11 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, * * NOTE: This can return values > 0 * + * If time_seq is set to (u64)-1, it will not search delayed_refs, and behave + * much like trans == NULL case, the difference only lies in it will not + * commit root. + * The special case is for qgroup to search roots in commit_transaction(). + * * FIXME some caching might speed things up */ static int find_parent_nodes(struct btrfs_trans_handle *trans, @@ -920,6 +961,9 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, path->skip_locking = 1; } + if (time_seq == (u64)-1) + path->skip_locking = 1; + /* * grab both a lock on the path and a lock on the delayed ref head. * We need both to get a consistent picture of how the refs look @@ -934,9 +978,10 @@ again: BUG_ON(ret == 0); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS - if (trans && likely(trans->type != __TRANS_DUMMY)) { + if (trans && likely(trans->type != __TRANS_DUMMY) && + time_seq != (u64)-1) { #else - if (trans) { + if (trans && time_seq != (u64)-1) { #endif /* * look if there are updates for this ref queued and lock the @@ -1034,7 +1079,10 @@ again: eb = read_tree_block(fs_info->extent_root, ref->parent, 0); - if (!eb || !extent_buffer_uptodate(eb)) { + if (IS_ERR(eb)) { + ret = PTR_ERR(eb); + goto out; + } else if (!extent_buffer_uptodate(eb)) { free_extent_buffer(eb); ret = -EIO; goto out; @@ -1369,7 +1417,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, read_extent_buffer(eb, dest + bytes_left, name_off, name_len); if (eb != eb_in) { - btrfs_tree_read_unlock_blocking(eb); + if (!path->skip_locking) + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); } ret = btrfs_find_item(fs_root, path, parent, 0, @@ -1389,9 +1438,10 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, eb = path->nodes[0]; /* make sure we can use eb after releasing the path */ if (eb != eb_in) { - atomic_inc(&eb->refs); - btrfs_tree_read_lock(eb); - btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + if (!path->skip_locking) + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + path->nodes[0] = NULL; + path->locks[0] = 0; } btrfs_release_path(path); iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); @@ -1786,7 +1836,6 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, int found = 0; struct extent_buffer *eb; struct btrfs_inode_extref *extref; - struct extent_buffer *leaf; u32 item_size; u32 cur_offset; unsigned long ptr; @@ -1814,9 +1863,8 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); btrfs_release_path(path); - leaf = path->nodes[0]; - item_size = btrfs_item_size_nr(leaf, slot); - ptr = btrfs_item_ptr_offset(leaf, slot); + item_size = btrfs_item_size_nr(eb, slot); + ptr = btrfs_item_ptr_offset(eb, slot); cur_offset = 0; while (cur_offset < item_size) { @@ -1830,7 +1878,7 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, if (ret) break; - cur_offset += btrfs_inode_extref_name_len(leaf, extref); + cur_offset += btrfs_inode_extref_name_len(eb, extref); cur_offset += sizeof(*extref); } btrfs_tree_read_unlock_blocking(eb);