X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=kernel%2Ffs%2Fext4%2Finode.c;fp=kernel%2Ffs%2Fext4%2Finode.c;h=10690e5ba2ebd2007de251866036b65931aa2650;hb=52f993b8e89487ec9ee15a7fb4979e0f09a45b27;hp=06bda0361e7ce80dcecdf2245b4922e0af8cffd3;hpb=c189ccac5702322ed843fe17057035b7222a59b6;p=kvmfornfv.git diff --git a/kernel/fs/ext4/inode.c b/kernel/fs/ext4/inode.c index 06bda0361..10690e5ba 100644 --- a/kernel/fs/ext4/inode.c +++ b/kernel/fs/ext4/inode.c @@ -51,25 +51,31 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, struct ext4_inode_info *ei) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - __u16 csum_lo; - __u16 csum_hi = 0; __u32 csum; + __u16 dummy_csum = 0; + int offset = offsetof(struct ext4_inode, i_checksum_lo); + unsigned int csum_size = sizeof(dummy_csum); - csum_lo = le16_to_cpu(raw->i_checksum_lo); - raw->i_checksum_lo = 0; - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && - EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) { - csum_hi = le16_to_cpu(raw->i_checksum_hi); - raw->i_checksum_hi = 0; - } - - csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, - EXT4_INODE_SIZE(inode->i_sb)); + csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, offset); + csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, csum_size); + offset += csum_size; + csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, + EXT4_GOOD_OLD_INODE_SIZE - offset); - raw->i_checksum_lo = cpu_to_le16(csum_lo); - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && - EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) - raw->i_checksum_hi = cpu_to_le16(csum_hi); + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { + offset = offsetof(struct ext4_inode, i_checksum_hi); + csum = ext4_chksum(sbi, csum, (__u8 *)raw + + EXT4_GOOD_OLD_INODE_SIZE, + offset - EXT4_GOOD_OLD_INODE_SIZE); + if (EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) { + csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, + csum_size); + offset += csum_size; + csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, + EXT4_INODE_SIZE(inode->i_sb) - + offset); + } + } return csum; } @@ -205,9 +211,9 @@ void ext4_evict_inode(struct inode *inode) * Note that directories do not have this problem because they * don't use page cache. */ - if (ext4_should_journal_data(inode) && - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && - inode->i_ino != EXT4_JOURNAL_INO) { + if (inode->i_ino != EXT4_JOURNAL_INO && + ext4_should_journal_data(inode) && + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; @@ -2589,13 +2595,36 @@ retry: done = true; } } - ext4_journal_stop(handle); + /* + * Caution: If the handle is synchronous, + * ext4_journal_stop() can wait for transaction commit + * to finish which may depend on writeback of pages to + * complete or on page lock to be released. In that + * case, we have to wait until after after we have + * submitted all the IO, released page locks we hold, + * and dropped io_end reference (for extent conversion + * to be able to complete) before stopping the handle. + */ + if (!ext4_handle_valid(handle) || handle->h_sync == 0) { + ext4_journal_stop(handle); + handle = NULL; + } /* Submit prepared bio */ ext4_io_submit(&mpd.io_submit); /* Unlock pages we didn't use */ mpage_release_unused_pages(&mpd, give_up_on_write); - /* Drop our io_end reference we got from init */ - ext4_put_io_end(mpd.io_submit.io_end); + /* + * Drop our io_end reference we got from init. We have + * to be careful and use deferred io_end finishing if + * we are still holding the transaction as we can + * release the last reference to io_end which may end + * up doing unwritten extent conversion. + */ + if (handle) { + ext4_put_io_end_defer(mpd.io_submit.io_end); + ext4_journal_stop(handle); + } else + ext4_put_io_end(mpd.io_submit.io_end); if (ret == -ENOSPC && sbi->s_journal) { /* @@ -3587,7 +3616,36 @@ int ext4_can_truncate(struct inode *inode) } /* - * ext4_punch_hole: punches a hole in a file by releaseing the blocks + * We have to make sure i_disksize gets properly updated before we truncate + * page cache due to hole punching or zero range. Otherwise i_disksize update + * can get lost as it may have been postponed to submission of writeback but + * that will never happen after we truncate page cache. + */ +int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, + loff_t len) +{ + handle_t *handle; + loff_t size = i_size_read(inode); + + WARN_ON(!mutex_is_locked(&inode->i_mutex)); + if (offset > size || offset + len < size) + return 0; + + if (EXT4_I(inode)->i_disksize >= size) + return 0; + + handle = ext4_journal_start(inode, EXT4_HT_MISC, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ext4_update_i_disksize(inode, size); + ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); + + return 0; +} + +/* + * ext4_punch_hole: punches a hole in a file by releasing the blocks * associated with the given offset and length * * @inode: File inode @@ -3616,7 +3674,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) * Write out all dirty pages to avoid race conditions * Then release them. */ - if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { + if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { ret = filemap_write_and_wait_range(mapping, offset, offset + length - 1); if (ret) @@ -3651,17 +3709,26 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) } + /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + + /* + * Prevent page faults from reinstantiating pages we have released from + * page cache. + */ + down_write(&EXT4_I(inode)->i_mmap_sem); first_block_offset = round_up(offset, sb->s_blocksize); last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; /* Now release the pages and zero block aligned part of pages*/ - if (last_block_offset > first_block_offset) + if (last_block_offset > first_block_offset) { + ret = ext4_update_disksize_before_punch(inode, offset, length); + if (ret) + goto out_dio; truncate_pagecache_range(inode, first_block_offset, last_block_offset); - - /* Wait all existing dio workers, newcomers will block on i_mutex */ - ext4_inode_block_unlocked_dio(inode); - inode_dio_wait(inode); + } if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) credits = ext4_writepage_trans_blocks(inode); @@ -3708,16 +3775,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) if (IS_SYNC(inode)) ext4_handle_sync(handle); - /* Now release the pages again to reduce race window */ - if (last_block_offset > first_block_offset) - truncate_pagecache_range(inode, first_block_offset, - last_block_offset); - inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); out_stop: ext4_journal_stop(handle); out_dio: + up_write(&EXT4_I(inode)->i_mmap_sem); ext4_inode_resume_unlocked_dio(inode); out_mutex: mutex_unlock(&inode->i_mutex); @@ -4112,6 +4175,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) struct inode *inode; journal_t *journal = EXT4_SB(sb)->s_journal; long ret; + loff_t size; int block; uid_t i_uid; gid_t i_gid; @@ -4203,6 +4267,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_file_acl |= ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; inode->i_size = ext4_isize(raw_inode); + if ((size = i_size_read(inode)) < 0) { + EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size); + ret = -EFSCORRUPTED; + goto bad_inode; + } ei->i_disksize = inode->i_size; #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; @@ -4486,14 +4555,14 @@ static int ext4_do_update_inode(handle_t *handle, * Fix up interoperability with old kernels. Otherwise, old inodes get * re-used with the upper 16 bits of the uid/gid intact */ - if (!ei->i_dtime) { + if (ei->i_dtime && list_empty(&ei->i_orphan)) { + raw_inode->i_uid_high = 0; + raw_inode->i_gid_high = 0; + } else { raw_inode->i_uid_high = cpu_to_le16(high_16_bits(i_uid)); raw_inode->i_gid_high = cpu_to_le16(high_16_bits(i_gid)); - } else { - raw_inode->i_uid_high = 0; - raw_inode->i_gid_high = 0; } } else { raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid)); @@ -4851,6 +4920,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } else ext4_wait_for_tail_page_commit(inode); } + down_write(&EXT4_I(inode)->i_mmap_sem); /* * Truncate pagecache after we've waited for commit * in data=journal mode to make pages freeable. @@ -4858,6 +4928,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) truncate_pagecache(inode, inode->i_size); if (shrink) ext4_truncate(inode); + up_write(&EXT4_I(inode)->i_mmap_sem); } if (!rc) { @@ -5109,6 +5180,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) might_sleep(); trace_ext4_mark_inode_dirty(inode, _RET_IP_); err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err) + return err; if (ext4_handle_valid(handle) && EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { @@ -5125,8 +5198,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) sbi->s_want_extra_isize, iloc, handle); if (ret) { - ext4_set_inode_state(inode, - EXT4_STATE_NO_EXPAND); if (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count)) { ext4_warning(inode->i_sb, @@ -5139,9 +5210,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) } } } - if (!err) - err = ext4_mark_iloc_dirty(handle, inode, &iloc); - return err; + return ext4_mark_iloc_dirty(handle, inode, &iloc); } /* @@ -5306,6 +5375,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) sb_start_pagefault(inode->i_sb); file_update_time(vma->vm_file); + + down_read(&EXT4_I(inode)->i_mmap_sem); /* Delalloc case is easy... */ if (test_opt(inode->i_sb, DELALLOC) && !ext4_should_journal_data(inode) && @@ -5375,6 +5446,19 @@ retry_alloc: out_ret: ret = block_page_mkwrite_return(ret); out: + up_read(&EXT4_I(inode)->i_mmap_sem); sb_end_pagefault(inode->i_sb); return ret; } + +int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vma->vm_file); + int err; + + down_read(&EXT4_I(inode)->i_mmap_sem); + err = filemap_fault(vma, vmf); + up_read(&EXT4_I(inode)->i_mmap_sem); + + return err; +}