These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / fs / ext4 / extents.c
index 87ba10d..551353b 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <linux/fiemap.h>
+#include <linux/backing-dev.h>
 #include "ext4_jbd2.h"
 #include "ext4_extents.h"
 #include "xattr.h"
@@ -441,7 +442,7 @@ static int __ext4_ext_check(const char *function, unsigned int line,
                            int depth, ext4_fsblk_t pblk)
 {
        const char *error_msg;
-       int max = 0;
+       int max = 0, err = -EFSCORRUPTED;
 
        if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
                error_msg = "invalid magic";
@@ -472,6 +473,7 @@ static int __ext4_ext_check(const char *function, unsigned int line,
        if (ext_depth(inode) != depth &&
            !ext4_extent_block_csum_verify(inode, eh)) {
                error_msg = "extent tree corrupted";
+               err = -EFSBADCRC;
                goto corrupted;
        }
        return 0;
@@ -484,7 +486,7 @@ corrupted:
                         le16_to_cpu(eh->eh_magic),
                         le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
                         max, le16_to_cpu(eh->eh_depth), depth);
-       return -EIO;
+       return err;
 }
 
 #define ext4_ext_check(inode, eh, depth, pblk)                 \
@@ -898,7 +900,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
 
                bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
                                            flags);
-               if (unlikely(IS_ERR(bh))) {
+               if (IS_ERR(bh)) {
                        ret = PTR_ERR(bh);
                        goto err;
                }
@@ -909,7 +911,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
                        put_bh(bh);
                        EXT4_ERROR_INODE(inode,
                                         "ppos %d > depth %d", ppos, depth);
-                       ret = -EIO;
+                       ret = -EFSCORRUPTED;
                        goto err;
                }
                path[ppos].p_bh = bh;
@@ -958,7 +960,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
                EXT4_ERROR_INODE(inode,
                                 "logical %d == ei_block %d!",
                                 logical, le32_to_cpu(curp->p_idx->ei_block));
-               return -EIO;
+               return -EFSCORRUPTED;
        }
 
        if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
@@ -967,7 +969,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
                                 "eh_entries %d >= eh_max %d!",
                                 le16_to_cpu(curp->p_hdr->eh_entries),
                                 le16_to_cpu(curp->p_hdr->eh_max));
-               return -EIO;
+               return -EFSCORRUPTED;
        }
 
        if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
@@ -991,7 +993,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 
        if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
                EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
-               return -EIO;
+               return -EFSCORRUPTED;
        }
 
        ix->ei_block = cpu_to_le32(logical);
@@ -1000,7 +1002,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 
        if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
                EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
-               return -EIO;
+               return -EFSCORRUPTED;
        }
 
        err = ext4_ext_dirty(handle, inode, curp);
@@ -1041,7 +1043,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
         * border from split point */
        if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
                EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
-               return -EIO;
+               return -EFSCORRUPTED;
        }
        if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
                border = path[depth].p_ext[1].ee_block;
@@ -1085,7 +1087,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
        newblock = ablocks[--a];
        if (unlikely(newblock == 0)) {
                EXT4_ERROR_INODE(inode, "newblock == 0!");
-               err = -EIO;
+               err = -EFSCORRUPTED;
                goto cleanup;
        }
        bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
@@ -1111,7 +1113,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
                                 path[depth].p_hdr->eh_entries,
                                 path[depth].p_hdr->eh_max);
-               err = -EIO;
+               err = -EFSCORRUPTED;
                goto cleanup;
        }
        /* start copy from next extent */
@@ -1150,7 +1152,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
        k = depth - at - 1;
        if (unlikely(k < 0)) {
                EXT4_ERROR_INODE(inode, "k %d < 0!", k);
-               err = -EIO;
+               err = -EFSCORRUPTED;
                goto cleanup;
        }
        if (k)
@@ -1190,7 +1192,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                        EXT4_ERROR_INODE(inode,
                                         "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
                                         le32_to_cpu(path[i].p_ext->ee_block));
-                       err = -EIO;
+                       err = -EFSCORRUPTED;
                        goto cleanup;
                }
                /* start copy indexes */
@@ -1424,7 +1426,7 @@ static int ext4_ext_search_left(struct inode *inode,
 
        if (unlikely(path == NULL)) {
                EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
-               return -EIO;
+               return -EFSCORRUPTED;
        }
        depth = path->p_depth;
        *phys = 0;
@@ -1443,7 +1445,7 @@ static int ext4_ext_search_left(struct inode *inode,
                        EXT4_ERROR_INODE(inode,
                                         "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
                                         *logical, le32_to_cpu(ex->ee_block));
-                       return -EIO;
+                       return -EFSCORRUPTED;
                }
                while (--depth >= 0) {
                        ix = path[depth].p_idx;
@@ -1454,7 +1456,7 @@ static int ext4_ext_search_left(struct inode *inode,
                                  EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
                le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
                                  depth);
-                               return -EIO;
+                               return -EFSCORRUPTED;
                        }
                }
                return 0;
@@ -1464,7 +1466,7 @@ static int ext4_ext_search_left(struct inode *inode,
                EXT4_ERROR_INODE(inode,
                                 "logical %d < ee_block %d + ee_len %d!",
                                 *logical, le32_to_cpu(ex->ee_block), ee_len);
-               return -EIO;
+               return -EFSCORRUPTED;
        }
 
        *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
@@ -1494,7 +1496,7 @@ static int ext4_ext_search_right(struct inode *inode,
 
        if (unlikely(path == NULL)) {
                EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
-               return -EIO;
+               return -EFSCORRUPTED;
        }
        depth = path->p_depth;
        *phys = 0;
@@ -1513,7 +1515,7 @@ static int ext4_ext_search_right(struct inode *inode,
                        EXT4_ERROR_INODE(inode,
                                         "first_extent(path[%d].p_hdr) != ex",
                                         depth);
-                       return -EIO;
+                       return -EFSCORRUPTED;
                }
                while (--depth >= 0) {
                        ix = path[depth].p_idx;
@@ -1521,7 +1523,7 @@ static int ext4_ext_search_right(struct inode *inode,
                                EXT4_ERROR_INODE(inode,
                                                 "ix != EXT_FIRST_INDEX *logical %d!",
                                                 *logical);
-                               return -EIO;
+                               return -EFSCORRUPTED;
                        }
                }
                goto found_extent;
@@ -1531,7 +1533,7 @@ static int ext4_ext_search_right(struct inode *inode,
                EXT4_ERROR_INODE(inode,
                                 "logical %d < ee_block %d + ee_len %d!",
                                 *logical, le32_to_cpu(ex->ee_block), ee_len);
-               return -EIO;
+               return -EFSCORRUPTED;
        }
 
        if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
@@ -1669,7 +1671,7 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
        if (unlikely(ex == NULL || eh == NULL)) {
                EXT4_ERROR_INODE(inode,
                                 "ex %p == NULL or eh %p == NULL", ex, eh);
-               return -EIO;
+               return -EFSCORRUPTED;
        }
 
        if (depth == 0) {
@@ -1937,14 +1939,14 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
                mb_flags |= EXT4_MB_DELALLOC_RESERVED;
        if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
                EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
-               return -EIO;
+               return -EFSCORRUPTED;
        }
        depth = ext_depth(inode);
        ex = path[depth].p_ext;
        eh = path[depth].p_hdr;
        if (unlikely(path[depth].p_hdr == NULL)) {
                EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
-               return -EIO;
+               return -EFSCORRUPTED;
        }
 
        /* try to insert block into found extent and return */
@@ -2171,7 +2173,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
                if (unlikely(path[depth].p_hdr == NULL)) {
                        up_read(&EXT4_I(inode)->i_data_sem);
                        EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
-                       err = -EIO;
+                       err = -EFSCORRUPTED;
                        break;
                }
                ex = path[depth].p_ext;
@@ -2240,7 +2242,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
 
                if (unlikely(es.es_len == 0)) {
                        EXT4_ERROR_INODE(inode, "es.es_len == 0");
-                       err = -EIO;
+                       err = -EFSCORRUPTED;
                        break;
                }
 
@@ -2263,7 +2265,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
                                                 "next extent == %u, next "
                                                 "delalloc extent = %u",
                                                 next, next_del);
-                               err = -EIO;
+                               err = -EFSCORRUPTED;
                                break;
                        }
                }
@@ -2362,7 +2364,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
        leaf = ext4_idx_pblock(path->p_idx);
        if (unlikely(path->p_hdr->eh_entries == 0)) {
                EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
-               return -EIO;
+               return -EFSCORRUPTED;
        }
        err = ext4_ext_get_access(handle, inode, path);
        if (err)
@@ -2611,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
        eh = path[depth].p_hdr;
        if (unlikely(path[depth].p_hdr == NULL)) {
                EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
-               return -EIO;
+               return -EFSCORRUPTED;
        }
        /* find where to start removing */
        ex = path[depth].p_ext;
@@ -2665,7 +2667,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                                         "on extent %u:%u",
                                         start, end, ex_ee_block,
                                         ex_ee_block + ex_ee_len - 1);
-                       err = -EIO;
+                       err = -EFSCORRUPTED;
                        goto out;
                } else if (a != ex_ee_block) {
                        /* remove tail of the extent */
@@ -2840,7 +2842,7 @@ again:
                                EXT4_ERROR_INODE(inode,
                                                 "path[%d].p_hdr == NULL",
                                                 depth);
-                               err = -EIO;
+                               err = -EFSCORRUPTED;
                        }
                        goto out;
                }
@@ -2919,7 +2921,7 @@ again:
                i = 0;
 
                if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) {
-                       err = -EIO;
+                       err = -EFSCORRUPTED;
                        goto out;
                }
        }
@@ -2977,7 +2979,7 @@ again:
                         * Should be a no-op if we did IO above. */
                        cond_resched();
                        if (WARN_ON(i + 1 > depth)) {
-                               err = -EIO;
+                               err = -EFSCORRUPTED;
                                break;
                        }
                        path[i + 1].p_bh = bh;
@@ -3053,7 +3055,7 @@ void ext4_ext_init(struct super_block *sb)
         * possible initialization would be here
         */
 
-       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
+       if (ext4_has_feature_extents(sb)) {
 #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
                printk(KERN_INFO "EXT4-fs: file extents enabled"
 #ifdef AGGRESSIVE_TEST
@@ -3080,7 +3082,7 @@ void ext4_ext_init(struct super_block *sb)
  */
 void ext4_ext_release(struct super_block *sb)
 {
-       if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
+       if (!ext4_has_feature_extents(sb))
                return;
 
 #ifdef EXTENTS_STATS
@@ -3344,7 +3346,7 @@ static int ext4_split_extent(handle_t *handle,
        if (!ex) {
                EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
                                 (unsigned long) map->m_lblk);
-               return -EIO;
+               return -EFSCORRUPTED;
        }
        unwritten = ext4_ext_is_unwritten(ex);
        split_flag1 = 0;
@@ -3557,6 +3559,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                max_zeroout = sbi->s_extent_max_zeroout_kb >>
                        (inode->i_sb->s_blocksize_bits - 10);
 
+       if (ext4_encrypted_inode(inode))
+               max_zeroout = 0;
+
        /* If extent is less than s_max_zeroout_kb, zeroout directly */
        if (max_zeroout && (ee_len <= max_zeroout)) {
                err = ext4_ext_zeroout(inode, ex);
@@ -3969,7 +3974,7 @@ convert_initialized_extent(handle_t *handle, struct inode *inode,
                if (!ex) {
                        EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
                                         (unsigned long) map->m_lblk);
-                       return -EIO;
+                       return -EFSCORRUPTED;
                }
        }
 
@@ -4307,7 +4312,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                                 "lblock: %lu, depth: %d pblock %lld",
                                 (unsigned long) map->m_lblk, depth,
                                 path[depth].p_block);
-               err = -EIO;
+               err = -EFSCORRUPTED;
                goto out2;
        }
 
@@ -4456,6 +4461,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                ar.flags |= EXT4_MB_HINT_NOPREALLOC;
        if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
                ar.flags |= EXT4_MB_DELALLOC_RESERVED;
+       if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
+               ar.flags |= EXT4_MB_USE_RESERVED;
        newblock = ext4_mb_new_blocks(handle, &ar, &err);
        if (!newblock)
                goto out2;
@@ -4663,6 +4670,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
        int ret = 0;
        int ret2 = 0;
        int retries = 0;
+       int depth = 0;
        struct ext4_map_blocks map;
        unsigned int credits;
        loff_t epos;
@@ -4677,13 +4685,32 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
        if (len <= EXT_UNWRITTEN_MAX_LEN)
                flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
 
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
        /*
         * credits to insert 1 extent into extent tree
         */
        credits = ext4_chunk_trans_blocks(inode, len);
+       /*
+        * We can only call ext_depth() on extent based inodes
+        */
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+               depth = ext_depth(inode);
+       else
+               depth = -1;
 
 retry:
        while (ret >= 0 && len) {
+               /*
+                * Recalculate credits when extent tree depth changes.
+                */
+               if (depth >= 0 && depth != ext_depth(inode)) {
+                       credits = ext4_chunk_trans_blocks(inode, len);
+                       depth = ext_depth(inode);
+               }
+
                handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
                                            credits);
                if (IS_ERR(handle)) {
@@ -4725,6 +4752,8 @@ retry:
                goto retry;
        }
 
+       ext4_inode_resume_unlocked_dio(inode);
+
        return ret > 0 ? ret2 : ret;
 }
 
@@ -4912,12 +4941,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
         * bug we should fix....
         */
        if (ext4_encrypted_inode(inode) &&
-           (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)))
+           (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE |
+                    FALLOC_FL_ZERO_RANGE)))
                return -EOPNOTSUPP;
 
        /* Return error if mode is not supported */
        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
-                    FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+                    FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
+                    FALLOC_FL_INSERT_RANGE))
                return -EOPNOTSUPP;
 
        if (mode & FALLOC_FL_PUNCH_HOLE)
@@ -4930,6 +4961,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (mode & FALLOC_FL_COLLAPSE_RANGE)
                return ext4_collapse_range(inode, offset, len);
 
+       if (mode & FALLOC_FL_INSERT_RANGE)
+               return ext4_insert_range(inode, offset, len);
+
        if (mode & FALLOC_FL_ZERO_RANGE)
                return ext4_zero_range(file, offset, len, mode);
 
@@ -5224,13 +5258,13 @@ ext4_access_path(handle_t *handle, struct inode *inode,
 /*
  * ext4_ext_shift_path_extents:
  * Shift the extents of a path structure lying between path[depth].p_ext
- * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift
- * from starting block for each extent.
+ * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells
+ * if it is right shift or left shift operation.
  */
 static int
 ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
                            struct inode *inode, handle_t *handle,
-                           ext4_lblk_t *start)
+                           enum SHIFT_DIRECTION SHIFT)
 {
        int depth, err = 0;
        struct ext4_extent *ex_start, *ex_last;
@@ -5241,7 +5275,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
                if (depth == path->p_depth) {
                        ex_start = path[depth].p_ext;
                        if (!ex_start)
-                               return -EIO;
+                               return -EFSCORRUPTED;
 
                        ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
 
@@ -5252,19 +5286,25 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
                        if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
                                update = 1;
 
-                       *start = le32_to_cpu(ex_last->ee_block) +
-                               ext4_ext_get_actual_len(ex_last);
-
                        while (ex_start <= ex_last) {
-                               le32_add_cpu(&ex_start->ee_block, -shift);
-                               /* Try to merge to the left. */
-                               if ((ex_start >
-                                    EXT_FIRST_EXTENT(path[depth].p_hdr)) &&
-                                   ext4_ext_try_to_merge_right(inode,
-                                                       path, ex_start - 1))
+                               if (SHIFT == SHIFT_LEFT) {
+                                       le32_add_cpu(&ex_start->ee_block,
+                                               -shift);
+                                       /* Try to merge to the left. */
+                                       if ((ex_start >
+                                           EXT_FIRST_EXTENT(path[depth].p_hdr))
+                                           &&
+                                           ext4_ext_try_to_merge_right(inode,
+                                           path, ex_start - 1))
+                                               ex_last--;
+                                       else
+                                               ex_start++;
+                               } else {
+                                       le32_add_cpu(&ex_last->ee_block, shift);
+                                       ext4_ext_try_to_merge_right(inode, path,
+                                               ex_last);
                                        ex_last--;
-                               else
-                                       ex_start++;
+                               }
                        }
                        err = ext4_ext_dirty(handle, inode, path + depth);
                        if (err)
@@ -5279,7 +5319,10 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
                if (err)
                        goto out;
 
-               le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
+               if (SHIFT == SHIFT_LEFT)
+                       le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
+               else
+                       le32_add_cpu(&path[depth].p_idx->ei_block, shift);
                err = ext4_ext_dirty(handle, inode, path + depth);
                if (err)
                        goto out;
@@ -5297,19 +5340,20 @@ out:
 
 /*
  * ext4_ext_shift_extents:
- * All the extents which lies in the range from start to the last allocated
- * block for the file are shifted downwards by shift blocks.
+ * All the extents which lies in the range from @start to the last allocated
+ * block for the @inode are shifted either towards left or right (depending
+ * upon @SHIFT) by @shift blocks.
  * On success, 0 is returned, error otherwise.
  */
 static int
 ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
-                      ext4_lblk_t start, ext4_lblk_t shift)
+                      ext4_lblk_t start, ext4_lblk_t shift,
+                      enum SHIFT_DIRECTION SHIFT)
 {
        struct ext4_ext_path *path;
        int ret = 0, depth;
        struct ext4_extent *extent;
-       ext4_lblk_t stop_block;
-       ext4_lblk_t ex_start, ex_end;
+       ext4_lblk_t stop, *iterator, ex_start, ex_end;
 
        /* Let path point to the last extent */
        path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
@@ -5321,58 +5365,84 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
        if (!extent)
                goto out;
 
-       stop_block = le32_to_cpu(extent->ee_block) +
+       stop = le32_to_cpu(extent->ee_block) +
                        ext4_ext_get_actual_len(extent);
 
-       /* Nothing to shift, if hole is at the end of file */
-       if (start >= stop_block)
-               goto out;
+       /*
+        * In case of left shift, Don't start shifting extents until we make
+        * sure the hole is big enough to accommodate the shift.
+       */
+       if (SHIFT == SHIFT_LEFT) {
+               path = ext4_find_extent(inode, start - 1, &path, 0);
+               if (IS_ERR(path))
+                       return PTR_ERR(path);
+               depth = path->p_depth;
+               extent =  path[depth].p_ext;
+               if (extent) {
+                       ex_start = le32_to_cpu(extent->ee_block);
+                       ex_end = le32_to_cpu(extent->ee_block) +
+                               ext4_ext_get_actual_len(extent);
+               } else {
+                       ex_start = 0;
+                       ex_end = 0;
+               }
 
-       /*
-        * Don't start shifting extents until we make sure the hole is big
-        * enough to accomodate the shift.
-        */
-       path = ext4_find_extent(inode, start - 1, &path, 0);
-       if (IS_ERR(path))
-               return PTR_ERR(path);
-       depth = path->p_depth;
-       extent =  path[depth].p_ext;
-       if (extent) {
-               ex_start = le32_to_cpu(extent->ee_block);
-               ex_end = le32_to_cpu(extent->ee_block) +
-                       ext4_ext_get_actual_len(extent);
-       } else {
-               ex_start = 0;
-               ex_end = 0;
+               if ((start == ex_start && shift > ex_start) ||
+                   (shift > start - ex_end)) {
+                       ext4_ext_drop_refs(path);
+                       kfree(path);
+                       return -EINVAL;
+               }
        }
 
-       if ((start == ex_start && shift > ex_start) ||
-           (shift > start - ex_end))
-               return -EINVAL;
+       /*
+        * In case of left shift, iterator points to start and it is increased
+        * till we reach stop. In case of right shift, iterator points to stop
+        * and it is decreased till we reach start.
+        */
+       if (SHIFT == SHIFT_LEFT)
+               iterator = &start;
+       else
+               iterator = &stop;
 
        /* Its safe to start updating extents */
-       while (start < stop_block) {
-               path = ext4_find_extent(inode, start, &path, 0);
+       while (start < stop) {
+               path = ext4_find_extent(inode, *iterator, &path, 0);
                if (IS_ERR(path))
                        return PTR_ERR(path);
                depth = path->p_depth;
                extent = path[depth].p_ext;
                if (!extent) {
                        EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
-                                        (unsigned long) start);
-                       return -EIO;
+                                        (unsigned long) *iterator);
+                       return -EFSCORRUPTED;
                }
-               if (start > le32_to_cpu(extent->ee_block)) {
+               if (SHIFT == SHIFT_LEFT && *iterator >
+                   le32_to_cpu(extent->ee_block)) {
                        /* Hole, move to the next extent */
                        if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
                                path[depth].p_ext++;
                        } else {
-                               start = ext4_ext_next_allocated_block(path);
+                               *iterator = ext4_ext_next_allocated_block(path);
                                continue;
                        }
                }
+
+               if (SHIFT == SHIFT_LEFT) {
+                       extent = EXT_LAST_EXTENT(path[depth].p_hdr);
+                       *iterator = le32_to_cpu(extent->ee_block) +
+                                       ext4_ext_get_actual_len(extent);
+               } else {
+                       extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
+                       *iterator =  le32_to_cpu(extent->ee_block) > 0 ?
+                               le32_to_cpu(extent->ee_block) - 1 : 0;
+                       /* Update path extent in case we need to stop */
+                       while (le32_to_cpu(extent->ee_block) < start)
+                               extent++;
+                       path[depth].p_ext = extent;
+               }
                ret = ext4_ext_shift_path_extents(path, shift, inode,
-                               handle, &start);
+                               handle, SHIFT);
                if (ret)
                        break;
        }
@@ -5485,7 +5555,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        ext4_discard_preallocations(inode);
 
        ret = ext4_ext_shift_extents(inode, handle, punch_stop,
-                                    punch_stop - punch_start);
+                                    punch_stop - punch_start, SHIFT_LEFT);
        if (ret) {
                up_write(&EXT4_I(inode)->i_data_sem);
                goto out_stop;
@@ -5510,6 +5580,174 @@ out_mutex:
        return ret;
 }
 
+/*
+ * ext4_insert_range:
+ * This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate.
+ * The data blocks starting from @offset to the EOF are shifted by @len
+ * towards right to create a hole in the @inode. Inode size is increased
+ * by len bytes.
+ * Returns 0 on success, error otherwise.
+ */
+int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+{
+       struct super_block *sb = inode->i_sb;
+       handle_t *handle;
+       struct ext4_ext_path *path;
+       struct ext4_extent *extent;
+       ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0;
+       unsigned int credits, ee_len;
+       int ret = 0, depth, split_flag = 0;
+       loff_t ioffset;
+
+       /*
+        * We need to test this early because xfstests assumes that an
+        * insert range of (0, 1) will return EOPNOTSUPP if the file
+        * system does not support insert range.
+        */
+       if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+               return -EOPNOTSUPP;
+
+       /* Insert range works only on fs block size aligned offsets. */
+       if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
+                       len & (EXT4_CLUSTER_SIZE(sb) - 1))
+               return -EINVAL;
+
+       if (!S_ISREG(inode->i_mode))
+               return -EOPNOTSUPP;
+
+       trace_ext4_insert_range(inode, offset, len);
+
+       offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
+       len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
+
+       /* Call ext4_force_commit to flush all data in case of data=journal */
+       if (ext4_should_journal_data(inode)) {
+               ret = ext4_force_commit(inode->i_sb);
+               if (ret)
+                       return ret;
+       }
+
+       /*
+        * Need to round down to align start offset to page size boundary
+        * for page size > block size.
+        */
+       ioffset = round_down(offset, PAGE_SIZE);
+
+       /* Write out all dirty pages */
+       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+                       LLONG_MAX);
+       if (ret)
+               return ret;
+
+       /* Take mutex lock */
+       mutex_lock(&inode->i_mutex);
+
+       /* Currently just for extent based files */
+       if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+               ret = -EOPNOTSUPP;
+               goto out_mutex;
+       }
+
+       /* Check for wrap through zero */
+       if (inode->i_size + len > inode->i_sb->s_maxbytes) {
+               ret = -EFBIG;
+               goto out_mutex;
+       }
+
+       /* Offset should be less than i_size */
+       if (offset >= i_size_read(inode)) {
+               ret = -EINVAL;
+               goto out_mutex;
+       }
+
+       truncate_pagecache(inode, ioffset);
+
+       /* Wait for existing dio to complete */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
+       credits = ext4_writepage_trans_blocks(inode);
+       handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
+       if (IS_ERR(handle)) {
+               ret = PTR_ERR(handle);
+               goto out_dio;
+       }
+
+       /* Expand file to avoid data loss if there is error while shifting */
+       inode->i_size += len;
+       EXT4_I(inode)->i_disksize += len;
+       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+       ret = ext4_mark_inode_dirty(handle, inode);
+       if (ret)
+               goto out_stop;
+
+       down_write(&EXT4_I(inode)->i_data_sem);
+       ext4_discard_preallocations(inode);
+
+       path = ext4_find_extent(inode, offset_lblk, NULL, 0);
+       if (IS_ERR(path)) {
+               up_write(&EXT4_I(inode)->i_data_sem);
+               goto out_stop;
+       }
+
+       depth = ext_depth(inode);
+       extent = path[depth].p_ext;
+       if (extent) {
+               ee_start_lblk = le32_to_cpu(extent->ee_block);
+               ee_len = ext4_ext_get_actual_len(extent);
+
+               /*
+                * If offset_lblk is not the starting block of extent, split
+                * the extent @offset_lblk
+                */
+               if ((offset_lblk > ee_start_lblk) &&
+                               (offset_lblk < (ee_start_lblk + ee_len))) {
+                       if (ext4_ext_is_unwritten(extent))
+                               split_flag = EXT4_EXT_MARK_UNWRIT1 |
+                                       EXT4_EXT_MARK_UNWRIT2;
+                       ret = ext4_split_extent_at(handle, inode, &path,
+                                       offset_lblk, split_flag,
+                                       EXT4_EX_NOCACHE |
+                                       EXT4_GET_BLOCKS_PRE_IO |
+                                       EXT4_GET_BLOCKS_METADATA_NOFAIL);
+               }
+
+               ext4_ext_drop_refs(path);
+               kfree(path);
+               if (ret < 0) {
+                       up_write(&EXT4_I(inode)->i_data_sem);
+                       goto out_stop;
+               }
+       }
+
+       ret = ext4_es_remove_extent(inode, offset_lblk,
+                       EXT_MAX_BLOCKS - offset_lblk);
+       if (ret) {
+               up_write(&EXT4_I(inode)->i_data_sem);
+               goto out_stop;
+       }
+
+       /*
+        * if offset_lblk lies in a hole which is at start of file, use
+        * ee_start_lblk to shift extents
+        */
+       ret = ext4_ext_shift_extents(inode, handle,
+               ee_start_lblk > offset_lblk ? ee_start_lblk : offset_lblk,
+               len_lblk, SHIFT_RIGHT);
+
+       up_write(&EXT4_I(inode)->i_data_sem);
+       if (IS_SYNC(inode))
+               ext4_handle_sync(handle);
+
+out_stop:
+       ext4_journal_stop(handle);
+out_dio:
+       ext4_inode_resume_unlocked_dio(inode);
+out_mutex:
+       mutex_unlock(&inode->i_mutex);
+       return ret;
+}
+
 /**
  * ext4_swap_extents - Swap extents between two inodes
  *
@@ -5542,7 +5780,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
        BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
        BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
        BUG_ON(!mutex_is_locked(&inode1->i_mutex));
-       BUG_ON(!mutex_is_locked(&inode1->i_mutex));
+       BUG_ON(!mutex_is_locked(&inode2->i_mutex));
 
        *erp = ext4_es_remove_extent(inode1, lblk1, count);
        if (unlikely(*erp))
@@ -5558,7 +5796,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
                int split = 0;
 
                path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
-               if (unlikely(IS_ERR(path1))) {
+               if (IS_ERR(path1)) {
                        *erp = PTR_ERR(path1);
                        path1 = NULL;
                finish:
@@ -5566,7 +5804,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
                        goto repeat;
                }
                path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
-               if (unlikely(IS_ERR(path2))) {
+               if (IS_ERR(path2)) {
                        *erp = PTR_ERR(path2);
                        path2 = NULL;
                        goto finish;