These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / mm / gup.c
index 6297f6b..deafa2c 100644 (file)
@@ -12,7 +12,9 @@
 #include <linux/sched.h>
 #include <linux/rwsem.h>
 #include <linux/hugetlb.h>
+
 #include <asm/pgtable.h>
+#include <asm/tlbflush.h>
 
 #include "internal.h"
 
@@ -32,6 +34,30 @@ static struct page *no_page_table(struct vm_area_struct *vma,
        return NULL;
 }
 
+static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
+               pte_t *pte, unsigned int flags)
+{
+       /* No page to get reference */
+       if (flags & FOLL_GET)
+               return -EFAULT;
+
+       if (flags & FOLL_TOUCH) {
+               pte_t entry = *pte;
+
+               if (flags & FOLL_WRITE)
+                       entry = pte_mkdirty(entry);
+               entry = pte_mkyoung(entry);
+
+               if (!pte_same(*pte, entry)) {
+                       set_pte_at(vma->vm_mm, address, pte, entry);
+                       update_mmu_cache(vma, address, pte);
+               }
+       }
+
+       /* Proper page table entry exists, but no corresponding struct page */
+       return -EEXIST;
+}
+
 static struct page *follow_page_pte(struct vm_area_struct *vma,
                unsigned long address, pmd_t *pmd, unsigned int flags)
 {
@@ -73,10 +99,21 @@ retry:
 
        page = vm_normal_page(vma, address, pte);
        if (unlikely(!page)) {
-               if ((flags & FOLL_DUMP) ||
-                   !is_zero_pfn(pte_pfn(pte)))
-                       goto bad_page;
-               page = pte_page(pte);
+               if (flags & FOLL_DUMP) {
+                       /* Avoid special (like zero) pages in core dumps */
+                       page = ERR_PTR(-EFAULT);
+                       goto out;
+               }
+
+               if (is_zero_pfn(pte_pfn(pte))) {
+                       page = pte_page(pte);
+               } else {
+                       int ret;
+
+                       ret = follow_pfn_pte(vma, address, ptep, flags);
+                       page = ERR_PTR(ret);
+                       goto out;
+               }
        }
 
        if (flags & FOLL_GET)
@@ -92,7 +129,7 @@ retry:
                 */
                mark_page_accessed(page);
        }
-       if ((flags & FOLL_POPULATE) && (vma->vm_flags & VM_LOCKED)) {
+       if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
                /*
                 * The preliminary mapping check is mainly to avoid the
                 * pointless overhead of lock_page on the ZERO_PAGE
@@ -114,12 +151,9 @@ retry:
                        unlock_page(page);
                }
        }
+out:
        pte_unmap_unlock(ptep, ptl);
        return page;
-bad_page:
-       pte_unmap_unlock(ptep, ptl);
-       return ERR_PTR(-EFAULT);
-
 no_page:
        pte_unmap_unlock(ptep, ptl);
        if (!pte_none(pte))
@@ -265,6 +299,9 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
        unsigned int fault_flags = 0;
        int ret;
 
+       /* mlock all present pages, but do not fault in new pages */
+       if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
+               return -ENOENT;
        /* For mm_populate(), just skip the stack guard page. */
        if ((*flags & FOLL_POPULATE) &&
                        (stack_guard_page_start(vma, address) ||
@@ -489,9 +526,15 @@ retry:
                                goto next_page;
                        }
                        BUG();
-               }
-               if (IS_ERR(page))
+               } else if (PTR_ERR(page) == -EEXIST) {
+                       /*
+                        * Proper page table entry exists, but no corresponding
+                        * struct page.
+                        */
+                       goto next_page;
+               } else if (IS_ERR(page)) {
                        return i ? i : PTR_ERR(page);
+               }
                if (pages) {
                        pages[i] = page;
                        flush_anon_page(vma, page, start);
@@ -850,7 +893,10 @@ long populate_vma_page_range(struct vm_area_struct *vma,
        VM_BUG_ON_VMA(end   > vma->vm_end, vma);
        VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
 
-       gup_flags = FOLL_TOUCH | FOLL_POPULATE;
+       gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
+       if (vma->vm_flags & VM_LOCKONFAULT)
+               gup_flags &= ~FOLL_POPULATE;
+
        /*
         * We want to touch writable mappings with a write fault in order
         * to break COW, except for shared mappings because these don't COW