Upgrade to 4.4.50-rt62
[kvmfornfv.git] / kernel / fs / ext4 / inode.c
index 06bda03..10690e5 100644 (file)
@@ -51,25 +51,31 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
                              struct ext4_inode_info *ei)
 {
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-       __u16 csum_lo;
-       __u16 csum_hi = 0;
        __u32 csum;
+       __u16 dummy_csum = 0;
+       int offset = offsetof(struct ext4_inode, i_checksum_lo);
+       unsigned int csum_size = sizeof(dummy_csum);
 
-       csum_lo = le16_to_cpu(raw->i_checksum_lo);
-       raw->i_checksum_lo = 0;
-       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
-           EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
-               csum_hi = le16_to_cpu(raw->i_checksum_hi);
-               raw->i_checksum_hi = 0;
-       }
-
-       csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw,
-                          EXT4_INODE_SIZE(inode->i_sb));
+       csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, offset);
+       csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, csum_size);
+       offset += csum_size;
+       csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
+                          EXT4_GOOD_OLD_INODE_SIZE - offset);
 
-       raw->i_checksum_lo = cpu_to_le16(csum_lo);
-       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
-           EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
-               raw->i_checksum_hi = cpu_to_le16(csum_hi);
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
+               offset = offsetof(struct ext4_inode, i_checksum_hi);
+               csum = ext4_chksum(sbi, csum, (__u8 *)raw +
+                                  EXT4_GOOD_OLD_INODE_SIZE,
+                                  offset - EXT4_GOOD_OLD_INODE_SIZE);
+               if (EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
+                       csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum,
+                                          csum_size);
+                       offset += csum_size;
+                       csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
+                                          EXT4_INODE_SIZE(inode->i_sb) -
+                                          offset);
+               }
+       }
 
        return csum;
 }
@@ -205,9 +211,9 @@ void ext4_evict_inode(struct inode *inode)
                 * Note that directories do not have this problem because they
                 * don't use page cache.
                 */
-               if (ext4_should_journal_data(inode) &&
-                   (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
-                   inode->i_ino != EXT4_JOURNAL_INO) {
+               if (inode->i_ino != EXT4_JOURNAL_INO &&
+                   ext4_should_journal_data(inode) &&
+                   (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
                        journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
                        tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
 
@@ -2589,13 +2595,36 @@ retry:
                                done = true;
                        }
                }
-               ext4_journal_stop(handle);
+               /*
+                * Caution: If the handle is synchronous,
+                * ext4_journal_stop() can wait for transaction commit
+                * to finish which may depend on writeback of pages to
+                * complete or on page lock to be released.  In that
+                * case, we have to wait until after after we have
+                * submitted all the IO, released page locks we hold,
+                * and dropped io_end reference (for extent conversion
+                * to be able to complete) before stopping the handle.
+                */
+               if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
+                       ext4_journal_stop(handle);
+                       handle = NULL;
+               }
                /* Submit prepared bio */
                ext4_io_submit(&mpd.io_submit);
                /* Unlock pages we didn't use */
                mpage_release_unused_pages(&mpd, give_up_on_write);
-               /* Drop our io_end reference we got from init */
-               ext4_put_io_end(mpd.io_submit.io_end);
+               /*
+                * Drop our io_end reference we got from init. We have
+                * to be careful and use deferred io_end finishing if
+                * we are still holding the transaction as we can
+                * release the last reference to io_end which may end
+                * up doing unwritten extent conversion.
+                */
+               if (handle) {
+                       ext4_put_io_end_defer(mpd.io_submit.io_end);
+                       ext4_journal_stop(handle);
+               } else
+                       ext4_put_io_end(mpd.io_submit.io_end);
 
                if (ret == -ENOSPC && sbi->s_journal) {
                        /*
@@ -3587,7 +3616,36 @@ int ext4_can_truncate(struct inode *inode)
 }
 
 /*
- * ext4_punch_hole: punches a hole in a file by releaseing the blocks
+ * We have to make sure i_disksize gets properly updated before we truncate
+ * page cache due to hole punching or zero range. Otherwise i_disksize update
+ * can get lost as it may have been postponed to submission of writeback but
+ * that will never happen after we truncate page cache.
+ */
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+                                     loff_t len)
+{
+       handle_t *handle;
+       loff_t size = i_size_read(inode);
+
+       WARN_ON(!mutex_is_locked(&inode->i_mutex));
+       if (offset > size || offset + len < size)
+               return 0;
+
+       if (EXT4_I(inode)->i_disksize >= size)
+               return 0;
+
+       handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+       ext4_update_i_disksize(inode, size);
+       ext4_mark_inode_dirty(handle, inode);
+       ext4_journal_stop(handle);
+
+       return 0;
+}
+
+/*
+ * ext4_punch_hole: punches a hole in a file by releasing the blocks
  * associated with the given offset and length
  *
  * @inode:  File inode
@@ -3616,7 +3674,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
         * Write out all dirty pages to avoid race conditions
         * Then release them.
         */
-       if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+       if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
                ret = filemap_write_and_wait_range(mapping, offset,
                                                   offset + length - 1);
                if (ret)
@@ -3651,17 +3709,26 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 
        }
 
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
+       /*
+        * Prevent page faults from reinstantiating pages we have released from
+        * page cache.
+        */
+       down_write(&EXT4_I(inode)->i_mmap_sem);
        first_block_offset = round_up(offset, sb->s_blocksize);
        last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
 
        /* Now release the pages and zero block aligned part of pages*/
-       if (last_block_offset > first_block_offset)
+       if (last_block_offset > first_block_offset) {
+               ret = ext4_update_disksize_before_punch(inode, offset, length);
+               if (ret)
+                       goto out_dio;
                truncate_pagecache_range(inode, first_block_offset,
                                         last_block_offset);
-
-       /* Wait all existing dio workers, newcomers will block on i_mutex */
-       ext4_inode_block_unlocked_dio(inode);
-       inode_dio_wait(inode);
+       }
 
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                credits = ext4_writepage_trans_blocks(inode);
@@ -3708,16 +3775,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
 
-       /* Now release the pages again to reduce race window */
-       if (last_block_offset > first_block_offset)
-               truncate_pagecache_range(inode, first_block_offset,
-                                        last_block_offset);
-
        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
 out_stop:
        ext4_journal_stop(handle);
 out_dio:
+       up_write(&EXT4_I(inode)->i_mmap_sem);
        ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
        mutex_unlock(&inode->i_mutex);
@@ -4112,6 +4175,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        struct inode *inode;
        journal_t *journal = EXT4_SB(sb)->s_journal;
        long ret;
+       loff_t size;
        int block;
        uid_t i_uid;
        gid_t i_gid;
@@ -4203,6 +4267,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                ei->i_file_acl |=
                        ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
        inode->i_size = ext4_isize(raw_inode);
+       if ((size = i_size_read(inode)) < 0) {
+               EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
+               ret = -EFSCORRUPTED;
+               goto bad_inode;
+       }
        ei->i_disksize = inode->i_size;
 #ifdef CONFIG_QUOTA
        ei->i_reserved_quota = 0;
@@ -4486,14 +4555,14 @@ static int ext4_do_update_inode(handle_t *handle,
  * Fix up interoperability with old kernels. Otherwise, old inodes get
  * re-used with the upper 16 bits of the uid/gid intact
  */
-               if (!ei->i_dtime) {
+               if (ei->i_dtime && list_empty(&ei->i_orphan)) {
+                       raw_inode->i_uid_high = 0;
+                       raw_inode->i_gid_high = 0;
+               } else {
                        raw_inode->i_uid_high =
                                cpu_to_le16(high_16_bits(i_uid));
                        raw_inode->i_gid_high =
                                cpu_to_le16(high_16_bits(i_gid));
-               } else {
-                       raw_inode->i_uid_high = 0;
-                       raw_inode->i_gid_high = 0;
                }
        } else {
                raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid));
@@ -4851,6 +4920,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                        } else
                                ext4_wait_for_tail_page_commit(inode);
                }
+               down_write(&EXT4_I(inode)->i_mmap_sem);
                /*
                 * Truncate pagecache after we've waited for commit
                 * in data=journal mode to make pages freeable.
@@ -4858,6 +4928,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                truncate_pagecache(inode, inode->i_size);
                if (shrink)
                        ext4_truncate(inode);
+               up_write(&EXT4_I(inode)->i_mmap_sem);
        }
 
        if (!rc) {
@@ -5109,6 +5180,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
        might_sleep();
        trace_ext4_mark_inode_dirty(inode, _RET_IP_);
        err = ext4_reserve_inode_write(handle, inode, &iloc);
+       if (err)
+               return err;
        if (ext4_handle_valid(handle) &&
            EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
            !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
@@ -5125,8 +5198,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
                                                      sbi->s_want_extra_isize,
                                                      iloc, handle);
                        if (ret) {
-                               ext4_set_inode_state(inode,
-                                                    EXT4_STATE_NO_EXPAND);
                                if (mnt_count !=
                                        le16_to_cpu(sbi->s_es->s_mnt_count)) {
                                        ext4_warning(inode->i_sb,
@@ -5139,9 +5210,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
                        }
                }
        }
-       if (!err)
-               err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-       return err;
+       return ext4_mark_iloc_dirty(handle, inode, &iloc);
 }
 
 /*
@@ -5306,6 +5375,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        sb_start_pagefault(inode->i_sb);
        file_update_time(vma->vm_file);
+
+       down_read(&EXT4_I(inode)->i_mmap_sem);
        /* Delalloc case is easy... */
        if (test_opt(inode->i_sb, DELALLOC) &&
            !ext4_should_journal_data(inode) &&
@@ -5375,6 +5446,19 @@ retry_alloc:
 out_ret:
        ret = block_page_mkwrite_return(ret);
 out:
+       up_read(&EXT4_I(inode)->i_mmap_sem);
        sb_end_pagefault(inode->i_sb);
        return ret;
 }
+
+int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct inode *inode = file_inode(vma->vm_file);
+       int err;
+
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       err = filemap_fault(vma, vmf);
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+
+       return err;
+}