X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;ds=sidebyside;f=kernel%2Ffs%2Fbtrfs%2Fdelayed-ref.c;h=e06dd75ad13f98999682c00128ae852e0a886e14;hb=e09b41010ba33a20a87472ee821fa407a5b8da36;hp=8f8ed7d20bac5f4e058fc693d0fdc24fde7abeda;hpb=f93b97fd65072de626c074dbe099a1fff05ce060;p=kvmfornfv.git diff --git a/kernel/fs/btrfs/delayed-ref.c b/kernel/fs/btrfs/delayed-ref.c index 8f8ed7d20..e06dd75ad 100644 --- a/kernel/fs/btrfs/delayed-ref.c +++ b/kernel/fs/btrfs/delayed-ref.c @@ -22,6 +22,7 @@ #include "ctree.h" #include "delayed-ref.h" #include "transaction.h" +#include "qgroup.h" struct kmem_cache *btrfs_delayed_ref_head_cachep; struct kmem_cache *btrfs_delayed_tree_ref_cachep; @@ -84,87 +85,6 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2, return 0; } -/* - * entries in the rb tree are ordered by the byte number of the extent, - * type of the delayed backrefs and content of delayed backrefs. - */ -static int comp_entry(struct btrfs_delayed_ref_node *ref2, - struct btrfs_delayed_ref_node *ref1, - bool compare_seq) -{ - if (ref1->bytenr < ref2->bytenr) - return -1; - if (ref1->bytenr > ref2->bytenr) - return 1; - if (ref1->is_head && ref2->is_head) - return 0; - if (ref2->is_head) - return -1; - if (ref1->is_head) - return 1; - if (ref1->type < ref2->type) - return -1; - if (ref1->type > ref2->type) - return 1; - if (ref1->no_quota > ref2->no_quota) - return 1; - if (ref1->no_quota < ref2->no_quota) - return -1; - /* merging of sequenced refs is not allowed */ - if (compare_seq) { - if (ref1->seq < ref2->seq) - return -1; - if (ref1->seq > ref2->seq) - return 1; - } - if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || - ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { - return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), - btrfs_delayed_node_to_tree_ref(ref1), - ref1->type); - } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY || - ref1->type == BTRFS_SHARED_DATA_REF_KEY) { - return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2), - btrfs_delayed_node_to_data_ref(ref1)); - } - BUG(); - return 0; -} - -/* - * insert a new ref into the rbtree. This returns any existing refs - * for the same (bytenr,parent) tuple, or NULL if the new node was properly - * inserted. - */ -static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, - struct rb_node *node) -{ - struct rb_node **p = &root->rb_node; - struct rb_node *parent_node = NULL; - struct btrfs_delayed_ref_node *entry; - struct btrfs_delayed_ref_node *ins; - int cmp; - - ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - while (*p) { - parent_node = *p; - entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, - rb_node); - - cmp = comp_entry(entry, ins, 1); - if (cmp < 0) - p = &(*p)->rb_left; - else if (cmp > 0) - p = &(*p)->rb_right; - else - return entry; - } - - rb_link_node(node, parent_node, p); - rb_insert_color(node, root); - return NULL; -} - /* insert a new ref to head ref rbtree */ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root, struct rb_node *node) @@ -268,7 +188,7 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, rb_erase(&head->href_node, &delayed_refs->href_root); } else { assert_spin_locked(&head->lock); - rb_erase(&ref->rb_node, &head->ref_root); + list_del(&ref->list); } ref->in_tree = 0; btrfs_put_delayed_ref(ref); @@ -277,36 +197,50 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, trans->delayed_ref_updates--; } -static int merge_ref(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_root *delayed_refs, - struct btrfs_delayed_ref_head *head, - struct btrfs_delayed_ref_node *ref, u64 seq) +static bool merge_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head, + struct btrfs_delayed_ref_node *ref, + u64 seq) { - struct rb_node *node; - int mod = 0; - int done = 0; + struct btrfs_delayed_ref_node *next; + bool done = false; + + next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, + list); + while (!done && &next->list != &head->ref_list) { + int mod; + struct btrfs_delayed_ref_node *next2; - node = rb_next(&ref->rb_node); - while (!done && node) { - struct btrfs_delayed_ref_node *next; + next2 = list_next_entry(next, list); + + if (next == ref) + goto next; - next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - node = rb_next(node); if (seq && next->seq >= seq) - break; - if (comp_entry(ref, next, 0)) - continue; + goto next; + + if (next->type != ref->type) + goto next; + + if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY || + ref->type == BTRFS_SHARED_BLOCK_REF_KEY) && + comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref), + btrfs_delayed_node_to_tree_ref(next), + ref->type)) + goto next; + if ((ref->type == BTRFS_EXTENT_DATA_REF_KEY || + ref->type == BTRFS_SHARED_DATA_REF_KEY) && + comp_data_refs(btrfs_delayed_node_to_data_ref(ref), + btrfs_delayed_node_to_data_ref(next))) + goto next; if (ref->action == next->action) { mod = next->ref_mod; } else { if (ref->ref_mod < next->ref_mod) { - struct btrfs_delayed_ref_node *tmp; - - tmp = ref; - ref = next; - next = tmp; - done = 1; + swap(ref, next); + done = true; } mod = -next->ref_mod; } @@ -315,16 +249,18 @@ static int merge_ref(struct btrfs_trans_handle *trans, ref->ref_mod += mod; if (ref->ref_mod == 0) { drop_delayed_ref(trans, delayed_refs, head, ref); - done = 1; + done = true; } else { /* - * You can't have multiples of the same ref on a tree - * block. + * Can't have multiples of the same ref on a tree block. */ WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || ref->type == BTRFS_SHARED_BLOCK_REF_KEY); } +next: + next = next2; } + return done; } @@ -333,14 +269,15 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head) { - struct rb_node *node; + struct btrfs_delayed_ref_node *ref; u64 seq = 0; assert_spin_locked(&head->lock); - /* - * We don't have too much refs to merge in the case of delayed data - * refs. - */ + + if (list_empty(&head->ref_list)) + return; + + /* We don't have too many refs to merge for data. */ if (head->is_data) return; @@ -354,19 +291,22 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, } spin_unlock(&fs_info->tree_mod_seq_lock); - node = rb_first(&head->ref_root); - while (node) { - struct btrfs_delayed_ref_node *ref; - - ref = rb_entry(node, struct btrfs_delayed_ref_node, - rb_node); - /* We can't merge refs that are outside of our seq count */ + ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, + list); + while (&ref->list != &head->ref_list) { if (seq && ref->seq >= seq) - break; - if (merge_ref(trans, delayed_refs, head, ref, seq)) - node = rb_first(&head->ref_root); - else - node = rb_next(&ref->rb_node); + goto next; + + if (merge_ref(trans, delayed_refs, head, ref, seq)) { + if (list_empty(&head->ref_list)) + break; + ref = list_first_entry(&head->ref_list, + struct btrfs_delayed_ref_node, + list); + continue; + } +next: + ref = list_next_entry(ref, list); } } @@ -443,45 +383,70 @@ again: } /* - * helper function to update an extent delayed ref in the - * rbtree. existing and update must both have the same - * bytenr and parent + * Helper to insert the ref_node to the tail or merge with tail. * - * This may free existing if the update cancels out whatever - * operation it was doing. + * Return 0 for insert. + * Return >0 for merge. */ -static noinline void -update_existing_ref(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_root *delayed_refs, - struct btrfs_delayed_ref_head *head, - struct btrfs_delayed_ref_node *existing, - struct btrfs_delayed_ref_node *update) +static int +add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_root *root, + struct btrfs_delayed_ref_head *href, + struct btrfs_delayed_ref_node *ref) { - if (update->action != existing->action) { - /* - * this is effectively undoing either an add or a - * drop. We decrement the ref_mod, and if it goes - * down to zero we just delete the entry without - * every changing the extent allocation tree. - */ - existing->ref_mod--; - if (existing->ref_mod == 0) - drop_delayed_ref(trans, delayed_refs, head, existing); - else - WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || - existing->type == BTRFS_SHARED_BLOCK_REF_KEY); + struct btrfs_delayed_ref_node *exist; + int mod; + int ret = 0; + + spin_lock(&href->lock); + /* Check whether we can merge the tail node with ref */ + if (list_empty(&href->ref_list)) + goto add_tail; + exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node, + list); + /* No need to compare bytenr nor is_head */ + if (exist->type != ref->type || exist->seq != ref->seq) + goto add_tail; + + if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY || + exist->type == BTRFS_SHARED_BLOCK_REF_KEY) && + comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist), + btrfs_delayed_node_to_tree_ref(ref), + ref->type)) + goto add_tail; + if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY || + exist->type == BTRFS_SHARED_DATA_REF_KEY) && + comp_data_refs(btrfs_delayed_node_to_data_ref(exist), + btrfs_delayed_node_to_data_ref(ref))) + goto add_tail; + + /* Now we are sure we can merge */ + ret = 1; + if (exist->action == ref->action) { + mod = ref->ref_mod; } else { - WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || - existing->type == BTRFS_SHARED_BLOCK_REF_KEY); - /* - * the action on the existing ref matches - * the action on the ref we're trying to add. - * Bump the ref_mod by one so the backref that - * is eventually added/removed has the correct - * reference count - */ - existing->ref_mod += update->ref_mod; + /* Need to change action */ + if (exist->ref_mod < ref->ref_mod) { + exist->action = ref->action; + mod = -exist->ref_mod; + exist->ref_mod = ref->ref_mod; + } else + mod = -ref->ref_mod; } + exist->ref_mod += mod; + + /* remove existing tail if its ref_mod is zero */ + if (exist->ref_mod == 0) + drop_delayed_ref(trans, root, href, exist); + spin_unlock(&href->lock); + return ret; + +add_tail: + list_add_tail(&ref->list, &href->ref_list); + atomic_inc(&root->num_entries); + trans->delayed_ref_updates++; + spin_unlock(&href->lock); + return ret; } /* @@ -568,15 +533,21 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, static noinline struct btrfs_delayed_ref_head * add_delayed_ref_head(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_node *ref, u64 bytenr, - u64 num_bytes, int action, int is_data) + struct btrfs_delayed_ref_node *ref, + struct btrfs_qgroup_extent_record *qrecord, + u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, + int action, int is_data) { struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_head *head_ref = NULL; struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_qgroup_extent_record *qexisting; int count_mod = 1; int must_insert_reserved = 0; + /* If reserved is provided, it must be a data extent. */ + BUG_ON(!is_data && reserved); + /* * the head node stores the sum of all the mods, so dropping a ref * should drop the sum in the head node by one. @@ -618,9 +589,28 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, head_ref = btrfs_delayed_node_to_head(ref); head_ref->must_insert_reserved = must_insert_reserved; head_ref->is_data = is_data; - head_ref->ref_root = RB_ROOT; + INIT_LIST_HEAD(&head_ref->ref_list); head_ref->processing = 0; head_ref->total_ref_mod = count_mod; + head_ref->qgroup_reserved = 0; + head_ref->qgroup_ref_root = 0; + + /* Record qgroup extent info if provided */ + if (qrecord) { + if (ref_root && reserved) { + head_ref->qgroup_ref_root = ref_root; + head_ref->qgroup_reserved = reserved; + } + + qrecord->bytenr = bytenr; + qrecord->num_bytes = num_bytes; + qrecord->old_roots = NULL; + + qexisting = btrfs_qgroup_insert_dirty_extent(delayed_refs, + qrecord); + if (qexisting) + kfree(qrecord); + } spin_lock_init(&head_ref->lock); mutex_init(&head_ref->mutex); @@ -630,6 +620,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, existing = htree_insert(&delayed_refs->href_root, &head_ref->href_node); if (existing) { + WARN_ON(ref_root && reserved && existing->qgroup_ref_root + && existing->qgroup_reserved); update_existing_head_ref(delayed_refs, &existing->node, ref); /* * we've updated the existing ref, free the newly @@ -657,12 +649,12 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head_ref, struct btrfs_delayed_ref_node *ref, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, int level, - int action, int no_quota) + int action) { - struct btrfs_delayed_ref_node *existing; struct btrfs_delayed_tree_ref *full_ref; struct btrfs_delayed_ref_root *delayed_refs; u64 seq = 0; + int ret; if (action == BTRFS_ADD_DELAYED_EXTENT) action = BTRFS_ADD_DELAYED_REF; @@ -679,7 +671,6 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ref->action = action; ref->is_head = 0; ref->in_tree = 1; - ref->no_quota = no_quota; ref->seq = seq; full_ref = btrfs_delayed_node_to_tree_ref(ref); @@ -693,21 +684,14 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, trace_add_delayed_tree_ref(ref, full_ref, action); - spin_lock(&head_ref->lock); - existing = tree_insert(&head_ref->ref_root, &ref->rb_node); - if (existing) { - update_existing_ref(trans, delayed_refs, head_ref, existing, - ref); - /* - * we've updated the existing ref, free the newly - * allocated ref - */ + ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref); + + /* + * XXX: memory should be freed at the same level allocated. + * But bad practice is anywhere... Follow it now. Need cleanup. + */ + if (ret > 0) kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref); - } else { - atomic_inc(&delayed_refs->num_entries); - trans->delayed_ref_updates++; - } - spin_unlock(&head_ref->lock); } /* @@ -719,12 +703,12 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head_ref, struct btrfs_delayed_ref_node *ref, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, u64 owner, - u64 offset, int action, int no_quota) + u64 offset, int action) { - struct btrfs_delayed_ref_node *existing; struct btrfs_delayed_data_ref *full_ref; struct btrfs_delayed_ref_root *delayed_refs; u64 seq = 0; + int ret; if (action == BTRFS_ADD_DELAYED_EXTENT) action = BTRFS_ADD_DELAYED_REF; @@ -742,7 +726,6 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, ref->action = action; ref->is_head = 0; ref->in_tree = 1; - ref->no_quota = no_quota; ref->seq = seq; full_ref = btrfs_delayed_node_to_data_ref(ref); @@ -758,21 +741,10 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, trace_add_delayed_data_ref(ref, full_ref, action); - spin_lock(&head_ref->lock); - existing = tree_insert(&head_ref->ref_root, &ref->rb_node); - if (existing) { - update_existing_ref(trans, delayed_refs, head_ref, existing, - ref); - /* - * we've updated the existing ref, free the newly - * allocated ref - */ + ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref); + + if (ret > 0) kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref); - } else { - atomic_inc(&delayed_refs->num_entries); - trans->delayed_ref_updates++; - } - spin_unlock(&head_ref->lock); } /* @@ -784,15 +756,12 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, int level, int action, - struct btrfs_delayed_extent_op *extent_op, - int no_quota) + struct btrfs_delayed_extent_op *extent_op) { struct btrfs_delayed_tree_ref *ref; struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; - - if (!is_fstree(ref_root) || !fs_info->quota_enabled) - no_quota = 0; + struct btrfs_qgroup_extent_record *record = NULL; BUG_ON(extent_op && extent_op->is_data); ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); @@ -800,9 +769,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, return -ENOMEM; head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); - if (!head_ref) { - kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); - return -ENOMEM; + if (!head_ref) + goto free_ref; + + if (fs_info->quota_enabled && is_fstree(ref_root)) { + record = kmalloc(sizeof(*record), GFP_NOFS); + if (!record) + goto free_head_ref; } head_ref->extent_op = extent_op; @@ -814,15 +787,21 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, * insert both the head node and the new ref without dropping * the spin lock */ - head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, - bytenr, num_bytes, action, 0); + head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, + bytenr, num_bytes, 0, 0, action, 0); add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, - num_bytes, parent, ref_root, level, action, - no_quota); + num_bytes, parent, ref_root, level, action); spin_unlock(&delayed_refs->lock); return 0; + +free_head_ref: + kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); +free_ref: + kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); + + return -ENOMEM; } /* @@ -832,16 +811,13 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, - u64 owner, u64 offset, int action, - struct btrfs_delayed_extent_op *extent_op, - int no_quota) + u64 owner, u64 offset, u64 reserved, int action, + struct btrfs_delayed_extent_op *extent_op) { struct btrfs_delayed_data_ref *ref; struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; - - if (!is_fstree(ref_root) || !fs_info->quota_enabled) - no_quota = 0; + struct btrfs_qgroup_extent_record *record = NULL; BUG_ON(extent_op && !extent_op->is_data); ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); @@ -854,6 +830,16 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, return -ENOMEM; } + if (fs_info->quota_enabled && is_fstree(ref_root)) { + record = kmalloc(sizeof(*record), GFP_NOFS); + if (!record) { + kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); + kmem_cache_free(btrfs_delayed_ref_head_cachep, + head_ref); + return -ENOMEM; + } + } + head_ref->extent_op = extent_op; delayed_refs = &trans->transaction->delayed_refs; @@ -863,17 +849,45 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, * insert both the head node and the new ref without dropping * the spin lock */ - head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, - bytenr, num_bytes, action, 1); + head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, + bytenr, num_bytes, ref_root, reserved, + action, 1); add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, num_bytes, parent, ref_root, owner, offset, - action, no_quota); + action); spin_unlock(&delayed_refs->lock); return 0; } +int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info, + struct btrfs_trans_handle *trans, + u64 ref_root, u64 bytenr, u64 num_bytes) +{ + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_delayed_ref_head *ref_head; + int ret = 0; + + if (!fs_info->quota_enabled || !is_fstree(ref_root)) + return 0; + + delayed_refs = &trans->transaction->delayed_refs; + + spin_lock(&delayed_refs->lock); + ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0); + if (!ref_head) { + ret = -ENOENT; + goto out; + } + WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root); + ref_head->qgroup_ref_root = ref_root; + ref_head->qgroup_reserved = num_bytes; +out: + spin_unlock(&delayed_refs->lock); + return ret; +} + int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, @@ -891,9 +905,9 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, delayed_refs = &trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); - add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, - num_bytes, BTRFS_UPDATE_DELAYED_HEAD, - extent_op->is_data); + add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr, + num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, + extent_op->is_data); spin_unlock(&delayed_refs->lock); return 0;