These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / fs / ocfs2 / aops.c
index 9ea7012..e6795c7 100644 (file)
@@ -227,7 +227,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
 
        if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) {
-               ocfs2_error(inode->i_sb, "Inode %llu lost inline data flag",
+               ocfs2_error(inode->i_sb, "Inode %llu lost inline data flag\n",
                            (unsigned long long)OCFS2_I(inode)->ip_blkno);
                return -EROFS;
        }
@@ -237,7 +237,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
        if (size > PAGE_CACHE_SIZE ||
            size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) {
                ocfs2_error(inode->i_sb,
-                           "Inode %llu has with inline data has bad size: %Lu",
+                           "Inode %llu has with inline data has bad size: %Lu\n",
                            (unsigned long long)OCFS2_I(inode)->ip_blkno,
                            (unsigned long long)size);
                return -EROFS;
@@ -523,7 +523,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
        unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
        unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
        unsigned long len = bh_result->b_size;
-       unsigned int clusters_to_alloc = 0;
+       unsigned int clusters_to_alloc = 0, contig_clusters = 0;
 
        cpos = ocfs2_blocks_to_clusters(inode->i_sb, iblock);
 
@@ -533,10 +533,14 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 
        inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
 
+       down_read(&OCFS2_I(inode)->ip_alloc_sem);
+
        /* This figures out the size of the next contiguous block, and
         * our logical offset */
        ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
                                          &contig_blocks, &ext_flags);
+       up_read(&OCFS2_I(inode)->ip_alloc_sem);
+
        if (ret) {
                mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
                     (unsigned long long)iblock);
@@ -557,16 +561,21 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 
                alloc_locked = 1;
 
+               down_write(&OCFS2_I(inode)->ip_alloc_sem);
+
                /* fill hole, allocate blocks can't be larger than the size
                 * of the hole */
                clusters_to_alloc = ocfs2_clusters_for_bytes(inode->i_sb, len);
-               if (clusters_to_alloc > contig_blocks)
-                       clusters_to_alloc = contig_blocks;
+               contig_clusters = ocfs2_clusters_for_blocks(inode->i_sb,
+                               contig_blocks);
+               if (clusters_to_alloc > contig_clusters)
+                       clusters_to_alloc = contig_clusters;
 
                /* allocate extent and insert them into the extent tree */
                ret = ocfs2_extend_allocation(inode, cpos,
                                clusters_to_alloc, 0);
                if (ret < 0) {
+                       up_write(&OCFS2_I(inode)->ip_alloc_sem);
                        mlog_errno(ret);
                        goto bail;
                }
@@ -574,11 +583,14 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
                ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
                                &contig_blocks, &ext_flags);
                if (ret < 0) {
+                       up_write(&OCFS2_I(inode)->ip_alloc_sem);
                        mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
                                        (unsigned long long)iblock);
                        ret = -EIO;
                        goto bail;
                }
+               set_buffer_new(bh_result);
+               up_write(&OCFS2_I(inode)->ip_alloc_sem);
        }
 
        /*
@@ -619,19 +631,19 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
        /* this io's submitter should not have unlocked this before we could */
        BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
 
-       if (ocfs2_iocb_is_sem_locked(iocb))
-               ocfs2_iocb_clear_sem_locked(iocb);
-
        if (ocfs2_iocb_is_unaligned_aio(iocb)) {
                ocfs2_iocb_clear_unaligned_aio(iocb);
 
                mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
        }
 
-       ocfs2_iocb_clear_rw_locked(iocb);
+       /* Let rw unlock to be done later to protect append direct io write */
+       if (offset + bytes <= i_size_read(inode)) {
+               ocfs2_iocb_clear_rw_locked(iocb);
 
-       level = ocfs2_iocb_rw_locked_level(iocb);
-       ocfs2_rw_unlock(inode, level);
+               level = ocfs2_iocb_rw_locked_level(iocb);
+               ocfs2_rw_unlock(inode, level);
+       }
 }
 
 static int ocfs2_releasepage(struct page *page, gfp_t wait)
@@ -833,12 +845,17 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
 
                /* zeroing out the previously allocated cluster tail
                 * that but not zeroed */
-               if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+               if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
+                       down_read(&OCFS2_I(inode)->ip_alloc_sem);
                        ret = ocfs2_direct_IO_zero_extend(osb, inode, offset,
                                        zero_len_tail, cluster_align_tail);
-               else
+                       up_read(&OCFS2_I(inode)->ip_alloc_sem);
+               } else {
+                       down_write(&OCFS2_I(inode)->ip_alloc_sem);
                        ret = ocfs2_direct_IO_extend_no_holes(osb, inode,
                                        offset);
+                       up_write(&OCFS2_I(inode)->ip_alloc_sem);
+               }
                if (ret < 0) {
                        mlog_errno(ret);
                        ocfs2_inode_unlock(inode, 1);
@@ -848,6 +865,7 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
                is_overwrite = ocfs2_is_overwrite(osb, inode, offset);
                if (is_overwrite < 0) {
                        mlog_errno(is_overwrite);
+                       ret = is_overwrite;
                        ocfs2_inode_unlock(inode, 1);
                        goto clean_orphan;
                }
@@ -858,7 +876,8 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
        written = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
                                       offset, ocfs2_direct_IO_get_blocks,
                                       ocfs2_dio_end_io, NULL, 0);
-       if (unlikely(written < 0)) {
+       /* overwrite aio may return -EIOCBQUEUED, and it is not an error */
+       if ((written < 0) && (written != -EIOCBQUEUED)) {
                loff_t i_size = i_size_read(inode);
 
                if (offset + count > i_size) {
@@ -877,12 +896,14 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
 
                                        ocfs2_inode_unlock(inode, 1);
                                        brelse(di_bh);
+                                       di_bh = NULL;
                                        goto clean_orphan;
                                }
                        }
 
                        ocfs2_inode_unlock(inode, 1);
                        brelse(di_bh);
+                       di_bh = NULL;
 
                        ret = jbd2_journal_force_commit(journal);
                        if (ret < 0)
@@ -925,13 +946,26 @@ clean_orphan:
                int update_isize = written > 0 ? 1 : 0;
                loff_t end = update_isize ? offset + written : 0;
 
-               tmp_ret = ocfs2_del_inode_from_orphan(osb, inode,
+               tmp_ret = ocfs2_inode_lock(inode, &di_bh, 1);
+               if (tmp_ret < 0) {
+                       ret = tmp_ret;
+                       mlog_errno(ret);
+                       goto out;
+               }
+
+               tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh,
                                update_isize, end);
                if (tmp_ret < 0) {
+                       ocfs2_inode_unlock(inode, 1);
                        ret = tmp_ret;
+                       mlog_errno(ret);
+                       brelse(di_bh);
                        goto out;
                }
 
+               ocfs2_inode_unlock(inode, 1);
+               brelse(di_bh);
+
                tmp_ret = jbd2_journal_force_commit(journal);
                if (tmp_ret < 0) {
                        ret = tmp_ret;
@@ -2176,10 +2210,7 @@ try_again:
                if (ret)
                        goto out_commit;
        }
-       /*
-        * We don't want this to fail in ocfs2_write_end(), so do it
-        * here.
-        */
+
        ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
                                      OCFS2_JOURNAL_ACCESS_WRITE);
        if (ret) {
@@ -2336,7 +2367,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
                           loff_t pos, unsigned len, unsigned copied,
                           struct page *page, void *fsdata)
 {
-       int i;
+       int i, ret;
        unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);
        struct inode *inode = mapping->host;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -2345,6 +2376,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
        handle_t *handle = wc->w_handle;
        struct page *tmppage;
 
+       ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
+                       OCFS2_JOURNAL_ACCESS_WRITE);
+       if (ret) {
+               copied = ret;
+               mlog_errno(ret);
+               goto out;
+       }
+
        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
                ocfs2_write_end_inline(inode, pos, len, &copied, di, wc);
                goto out_write_size;
@@ -2400,6 +2439,7 @@ out_write_size:
        ocfs2_update_inode_fsync_trans(handle, inode, 1);
        ocfs2_journal_dirty(handle, wc->w_di_bh);
 
+out:
        /* unlock pages before dealloc since it needs acquiring j_trans_barrier
         * lock, or it will cause a deadlock since journal commit threads holds
         * this lock and will ask for the page lock when flushing the data.