These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / fs / block_dev.c
index c7e4163..44d4a1e 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/device_cgroup.h>
 #include <linux/highmem.h>
 #include <linux/blkdev.h>
+#include <linux/backing-dev.h>
 #include <linux/module.h>
 #include <linux/blkpg.h>
 #include <linux/magic.h>
@@ -27,6 +28,7 @@
 #include <linux/namei.h>
 #include <linux/log2.h>
 #include <linux/cleancache.h>
+#include <linux/dax.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -42,18 +44,27 @@ static inline struct bdev_inode *BDEV_I(struct inode *inode)
        return container_of(inode, struct bdev_inode, vfs_inode);
 }
 
-inline struct block_device *I_BDEV(struct inode *inode)
+struct block_device *I_BDEV(struct inode *inode)
 {
        return &BDEV_I(inode)->bdev;
 }
 EXPORT_SYMBOL(I_BDEV);
 
-static void bdev_write_inode(struct inode *inode)
+static void bdev_write_inode(struct block_device *bdev)
 {
+       struct inode *inode = bdev->bd_inode;
+       int ret;
+
        spin_lock(&inode->i_lock);
        while (inode->i_state & I_DIRTY) {
                spin_unlock(&inode->i_lock);
-               WARN_ON_ONCE(write_inode_now(inode, true));
+               ret = write_inode_now(inode, true);
+               if (ret) {
+                       char name[BDEVNAME_SIZE];
+                       pr_warn_ratelimited("VFS: Dirty inode writeback failed "
+                                           "for block device %s (err=%d).\n",
+                                           bdevname(bdev, name), ret);
+               }
                spin_lock(&inode->i_lock);
        }
        spin_unlock(&inode->i_lock);
@@ -151,6 +162,9 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
 
+       if (IS_DAX(inode))
+               return dax_do_io(iocb, inode, iter, offset, blkdev_get_block,
+                               NULL, DIO_SKIP_DIO_COUNT);
        return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
                                    blkdev_get_block, NULL, NULL,
                                    DIO_SKIP_DIO_COUNT);
@@ -376,9 +390,17 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
                        struct page *page)
 {
        const struct block_device_operations *ops = bdev->bd_disk->fops;
-       if (!ops->rw_page)
-               return -EOPNOTSUPP;
-       return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+       int result = -EOPNOTSUPP;
+
+       if (!ops->rw_page || bdev_get_integrity(bdev))
+               return result;
+
+       result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+       if (result)
+               return result;
+       result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+       blk_queue_exit(bdev->bd_queue);
+       return result;
 }
 EXPORT_SYMBOL_GPL(bdev_read_page);
 
@@ -407,14 +429,20 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
        int result;
        int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
        const struct block_device_operations *ops = bdev->bd_disk->fops;
-       if (!ops->rw_page)
+
+       if (!ops->rw_page || bdev_get_integrity(bdev))
                return -EOPNOTSUPP;
+       result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+       if (result)
+               return result;
+
        set_page_writeback(page);
        result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
        if (result)
                end_page_writeback(page);
        else
                unlock_page(page);
+       blk_queue_exit(bdev->bd_queue);
        return result;
 }
 EXPORT_SYMBOL_GPL(bdev_write_page);
@@ -437,11 +465,17 @@ EXPORT_SYMBOL_GPL(bdev_write_page);
  * accessible at this address.
  */
 long bdev_direct_access(struct block_device *bdev, sector_t sector,
-                       void **addr, unsigned long *pfn, long size)
+                       void __pmem **addr, unsigned long *pfn, long size)
 {
        long avail;
        const struct block_device_operations *ops = bdev->bd_disk->fops;
 
+       /*
+        * The device driver is allowed to sleep, in order to make the
+        * memory directly accessible.
+        */
+       might_sleep();
+
        if (size < 0)
                return size;
        if (!ops->direct_access)
@@ -452,7 +486,7 @@ long bdev_direct_access(struct block_device *bdev, sector_t sector,
        sector += get_start_sect(bdev);
        if (sector % (PAGE_SIZE / 512))
                return -EINVAL;
-       avail = ops->direct_access(bdev, sector, addr, pfn, size);
+       avail = ops->direct_access(bdev, sector, addr, pfn);
        if (!avail)
                return -ERANGE;
        return min(avail, size);
@@ -546,7 +580,8 @@ static struct file_system_type bd_type = {
        .kill_sb        = kill_anon_super,
 };
 
-static struct super_block *blockdev_superblock __read_mostly;
+struct super_block *blockdev_superblock __read_mostly;
+EXPORT_SYMBOL_GPL(blockdev_superblock);
 
 void __init bdev_cache_init(void)
 {
@@ -687,11 +722,6 @@ static struct block_device *bd_acquire(struct inode *inode)
        return bdev;
 }
 
-int sb_is_blkdev_sb(struct super_block *sb)
-{
-       return sb == blockdev_superblock;
-}
-
 /* Call when you free inode */
 
 void bd_forget(struct inode *inode)
@@ -1068,7 +1098,7 @@ int revalidate_disk(struct gendisk *disk)
 
        if (disk->fops->revalidate_disk)
                ret = disk->fops->revalidate_disk(disk);
-
+       blk_integrity_revalidate(disk);
        bdev = bdget_disk(disk, 0);
        if (!bdev)
                return ret;
@@ -1173,6 +1203,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                bdev->bd_disk = disk;
                bdev->bd_queue = disk->queue;
                bdev->bd_contains = bdev;
+               bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;
                if (!partno) {
                        ret = -ENXIO;
                        bdev->bd_part = disk_get_part(disk, partno);
@@ -1234,6 +1265,13 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                                goto out_clear;
                        }
                        bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
+                       /*
+                        * If the partition is not aligned on a page
+                        * boundary, we can't do dax I/O to it.
+                        */
+                       if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) ||
+                           (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
+                               bdev->bd_inode->i_flags &= ~S_DAX;
                }
        } else {
                if (bdev->bd_contains == bdev) {
@@ -1485,11 +1523,14 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
                WARN_ON_ONCE(bdev->bd_holders);
                sync_blockdev(bdev);
                kill_bdev(bdev);
+
+               bdev_write_inode(bdev);
                /*
-                * ->release can cause the queue to disappear, so flush all
-                * dirty data before.
+                * Detaching bdev inode from its wb in __destroy_inode()
+                * is too late: the queue which embeds its bdi (along with
+                * root wb) can be gone as soon as we put_disk() below.
                 */
-               bdev_write_inode(bdev->bd_inode);
+               inode_detach_wb(bdev->bd_inode);
        }
        if (bdev->bd_contains == bdev) {
                if (disk->fops->release)
@@ -1762,7 +1803,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
 {
        struct inode *inode, *old_inode = NULL;
 
-       spin_lock(&inode_sb_list_lock);
+       spin_lock(&blockdev_superblock->s_inode_list_lock);
        list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
                struct address_space *mapping = inode->i_mapping;
 
@@ -1774,13 +1815,13 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
                }
                __iget(inode);
                spin_unlock(&inode->i_lock);
-               spin_unlock(&inode_sb_list_lock);
+               spin_unlock(&blockdev_superblock->s_inode_list_lock);
                /*
                 * We hold a reference to 'inode' so it couldn't have been
                 * removed from s_inodes list while we dropped the
-                * inode_sb_list_lock.  We cannot iput the inode now as we can
+                * s_inode_list_lock  We cannot iput the inode now as we can
                 * be holding the last reference and we cannot iput it under
-                * inode_sb_list_lock. So we keep the reference and iput it
+                * s_inode_list_lock. So we keep the reference and iput it
                 * later.
                 */
                iput(old_inode);
@@ -1788,8 +1829,8 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
 
                func(I_BDEV(inode), arg);
 
-               spin_lock(&inode_sb_list_lock);
+               spin_lock(&blockdev_superblock->s_inode_list_lock);
        }
-       spin_unlock(&inode_sb_list_lock);
+       spin_unlock(&blockdev_superblock->s_inode_list_lock);
        iput(old_inode);
 }