Upgrade to 4.4.50-rt62
[kvmfornfv.git] / kernel / fs / btrfs / tree-log.c
index 323e12c..ee7832e 100644 (file)
@@ -1923,12 +1923,11 @@ static noinline int find_dir_range(struct btrfs_root *root,
 next:
        /* check the next slot in the tree to see if it is a valid item */
        nritems = btrfs_header_nritems(path->nodes[0]);
+       path->slots[0]++;
        if (path->slots[0] >= nritems) {
                ret = btrfs_next_leaf(root, path);
                if (ret)
                        goto out;
-       } else {
-               path->slots[0]++;
        }
 
        btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
@@ -2696,14 +2695,12 @@ static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
                                             int index, int error)
 {
        struct btrfs_log_ctx *ctx;
+       struct btrfs_log_ctx *safe;
 
-       if (!error) {
-               INIT_LIST_HEAD(&root->log_ctxs[index]);
-               return;
-       }
-
-       list_for_each_entry(ctx, &root->log_ctxs[index], list)
+       list_for_each_entry_safe(ctx, safe, &root->log_ctxs[index], list) {
+               list_del_init(&ctx->list);
                ctx->log_ret = error;
+       }
 
        INIT_LIST_HEAD(&root->log_ctxs[index]);
 }
@@ -2850,6 +2847,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
        if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) {
                blk_finish_plug(&plug);
+               list_del_init(&root_log_ctx.list);
                mutex_unlock(&log_root_tree->log_mutex);
                ret = root_log_ctx.log_ret;
                goto out;
@@ -2943,13 +2941,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        mutex_unlock(&root->log_mutex);
 
 out_wake_log_root:
-       /*
-        * We needn't get log_mutex here because we are sure all
-        * the other tasks are blocked.
-        */
+       mutex_lock(&log_root_tree->log_mutex);
        btrfs_remove_all_log_ctxs(log_root_tree, index2, ret);
 
-       mutex_lock(&log_root_tree->log_mutex);
        log_root_tree->log_transid_committed++;
        atomic_set(&log_root_tree->log_commit[index2], 0);
        mutex_unlock(&log_root_tree->log_mutex);
@@ -2960,10 +2954,8 @@ out_wake_log_root:
        if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
                wake_up(&log_root_tree->log_commit_wait[index2]);
 out:
-       /* See above. */
-       btrfs_remove_all_log_ctxs(root, index1, ret);
-
        mutex_lock(&root->log_mutex);
+       btrfs_remove_all_log_ctxs(root, index1, ret);
        root->log_transid_committed++;
        atomic_set(&root->log_commit[index1], 0);
        mutex_unlock(&root->log_mutex);
@@ -4406,6 +4398,127 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
        return ret;
 }
 
+/*
+ * When we are logging a new inode X, check if it doesn't have a reference that
+ * matches the reference from some other inode Y created in a past transaction
+ * and that was renamed in the current transaction. If we don't do this, then at
+ * log replay time we can lose inode Y (and all its files if it's a directory):
+ *
+ * mkdir /mnt/x
+ * echo "hello world" > /mnt/x/foobar
+ * sync
+ * mv /mnt/x /mnt/y
+ * mkdir /mnt/x                 # or touch /mnt/x
+ * xfs_io -c fsync /mnt/x
+ * <power fail>
+ * mount fs, trigger log replay
+ *
+ * After the log replay procedure, we would lose the first directory and all its
+ * files (file foobar).
+ * For the case where inode Y is not a directory we simply end up losing it:
+ *
+ * echo "123" > /mnt/foo
+ * sync
+ * mv /mnt/foo /mnt/bar
+ * echo "abc" > /mnt/foo
+ * xfs_io -c fsync /mnt/foo
+ * <power fail>
+ *
+ * We also need this for cases where a snapshot entry is replaced by some other
+ * entry (file or directory) otherwise we end up with an unreplayable log due to
+ * attempts to delete the snapshot entry (entry of type BTRFS_ROOT_ITEM_KEY) as
+ * if it were a regular entry:
+ *
+ * mkdir /mnt/x
+ * btrfs subvolume snapshot /mnt /mnt/x/snap
+ * btrfs subvolume delete /mnt/x/snap
+ * rmdir /mnt/x
+ * mkdir /mnt/x
+ * fsync /mnt/x or fsync some new file inside it
+ * <power fail>
+ *
+ * The snapshot delete, rmdir of x, mkdir of a new x and the fsync all happen in
+ * the same transaction.
+ */
+static int btrfs_check_ref_name_override(struct extent_buffer *eb,
+                                        const int slot,
+                                        const struct btrfs_key *key,
+                                        struct inode *inode)
+{
+       int ret;
+       struct btrfs_path *search_path;
+       char *name = NULL;
+       u32 name_len = 0;
+       u32 item_size = btrfs_item_size_nr(eb, slot);
+       u32 cur_offset = 0;
+       unsigned long ptr = btrfs_item_ptr_offset(eb, slot);
+
+       search_path = btrfs_alloc_path();
+       if (!search_path)
+               return -ENOMEM;
+       search_path->search_commit_root = 1;
+       search_path->skip_locking = 1;
+
+       while (cur_offset < item_size) {
+               u64 parent;
+               u32 this_name_len;
+               u32 this_len;
+               unsigned long name_ptr;
+               struct btrfs_dir_item *di;
+
+               if (key->type == BTRFS_INODE_REF_KEY) {
+                       struct btrfs_inode_ref *iref;
+
+                       iref = (struct btrfs_inode_ref *)(ptr + cur_offset);
+                       parent = key->offset;
+                       this_name_len = btrfs_inode_ref_name_len(eb, iref);
+                       name_ptr = (unsigned long)(iref + 1);
+                       this_len = sizeof(*iref) + this_name_len;
+               } else {
+                       struct btrfs_inode_extref *extref;
+
+                       extref = (struct btrfs_inode_extref *)(ptr +
+                                                              cur_offset);
+                       parent = btrfs_inode_extref_parent(eb, extref);
+                       this_name_len = btrfs_inode_extref_name_len(eb, extref);
+                       name_ptr = (unsigned long)&extref->name;
+                       this_len = sizeof(*extref) + this_name_len;
+               }
+
+               if (this_name_len > name_len) {
+                       char *new_name;
+
+                       new_name = krealloc(name, this_name_len, GFP_NOFS);
+                       if (!new_name) {
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+                       name_len = this_name_len;
+                       name = new_name;
+               }
+
+               read_extent_buffer(eb, name, name_ptr, this_name_len);
+               di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root,
+                                          search_path, parent,
+                                          name, this_name_len, 0);
+               if (di && !IS_ERR(di)) {
+                       ret = 1;
+                       goto out;
+               } else if (IS_ERR(di)) {
+                       ret = PTR_ERR(di);
+                       goto out;
+               }
+               btrfs_release_path(search_path);
+
+               cur_offset += this_len;
+       }
+       ret = 0;
+out:
+       btrfs_free_path(search_path);
+       kfree(name);
+       return ret;
+}
+
 /* log a single inode in the tree log.
  * At least one parent directory for this inode must exist in the tree
  * or be logged already.
@@ -4578,6 +4691,22 @@ again:
                if (min_key.type == BTRFS_INODE_ITEM_KEY)
                        need_log_inode_item = false;
 
+               if ((min_key.type == BTRFS_INODE_REF_KEY ||
+                    min_key.type == BTRFS_INODE_EXTREF_KEY) &&
+                   BTRFS_I(inode)->generation == trans->transid) {
+                       ret = btrfs_check_ref_name_override(path->nodes[0],
+                                                           path->slots[0],
+                                                           &min_key, inode);
+                       if (ret < 0) {
+                               err = ret;
+                               goto out_unlock;
+                       } else if (ret > 0) {
+                               err = 1;
+                               btrfs_set_log_full_commit(root->fs_info, trans);
+                               goto out_unlock;
+                       }
+               }
+
                /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
                if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
                        if (ins_nr == 0)