These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / mm / mmap.c
index bb50cac..455772a 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/notifier.h>
 #include <linux/memory.h>
 #include <linux/printk.h>
+#include <linux/userfaultfd_k.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -440,12 +441,16 @@ static void validate_mm(struct mm_struct *mm)
        struct vm_area_struct *vma = mm->mmap;
 
        while (vma) {
+               struct anon_vma *anon_vma = vma->anon_vma;
                struct anon_vma_chain *avc;
 
-               vma_lock_anon_vma(vma);
-               list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
-                       anon_vma_interval_tree_verify(avc);
-               vma_unlock_anon_vma(vma);
+               if (anon_vma) {
+                       anon_vma_lock_read(anon_vma);
+                       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+                               anon_vma_interval_tree_verify(avc);
+                       anon_vma_unlock_read(anon_vma);
+               }
+
                highest_address = vma->vm_end;
                vma = vma->vm_next;
                i++;
@@ -919,7 +924,8 @@ again:                      remove_next = 1 + (end > next->vm_end);
  * per-vma resources, so we don't attempt to merge those.
  */
 static inline int is_mergeable_vma(struct vm_area_struct *vma,
-                       struct file *file, unsigned long vm_flags)
+                               struct file *file, unsigned long vm_flags,
+                               struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
 {
        /*
         * VM_SOFTDIRTY should not prevent from VMA merging, if we
@@ -935,6 +941,8 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
                return 0;
        if (vma->vm_ops && vma->vm_ops->close)
                return 0;
+       if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
+               return 0;
        return 1;
 }
 
@@ -965,9 +973,11 @@ static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
  */
 static int
 can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
-       struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+                    struct anon_vma *anon_vma, struct file *file,
+                    pgoff_t vm_pgoff,
+                    struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
 {
-       if (is_mergeable_vma(vma, file, vm_flags) &&
+       if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
            is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
                if (vma->vm_pgoff == vm_pgoff)
                        return 1;
@@ -984,9 +994,11 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
  */
 static int
 can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
-       struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+                   struct anon_vma *anon_vma, struct file *file,
+                   pgoff_t vm_pgoff,
+                   struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
 {
-       if (is_mergeable_vma(vma, file, vm_flags) &&
+       if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
            is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
                pgoff_t vm_pglen;
                vm_pglen = vma_pages(vma);
@@ -1029,7 +1041,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                        struct vm_area_struct *prev, unsigned long addr,
                        unsigned long end, unsigned long vm_flags,
                        struct anon_vma *anon_vma, struct file *file,
-                       pgoff_t pgoff, struct mempolicy *policy)
+                       pgoff_t pgoff, struct mempolicy *policy,
+                       struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
 {
        pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
        struct vm_area_struct *area, *next;
@@ -1056,14 +1069,17 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
        if (prev && prev->vm_end == addr &&
                        mpol_equal(vma_policy(prev), policy) &&
                        can_vma_merge_after(prev, vm_flags,
-                                               anon_vma, file, pgoff)) {
+                                           anon_vma, file, pgoff,
+                                           vm_userfaultfd_ctx)) {
                /*
                 * OK, it can.  Can we now merge in the successor as well?
                 */
                if (next && end == next->vm_start &&
                                mpol_equal(policy, vma_policy(next)) &&
                                can_vma_merge_before(next, vm_flags,
-                                       anon_vma, file, pgoff+pglen) &&
+                                                    anon_vma, file,
+                                                    pgoff+pglen,
+                                                    vm_userfaultfd_ctx) &&
                                is_mergeable_anon_vma(prev->anon_vma,
                                                      next->anon_vma, NULL)) {
                                                        /* cases 1, 6 */
@@ -1084,7 +1100,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
        if (next && end == next->vm_start &&
                        mpol_equal(policy, vma_policy(next)) &&
                        can_vma_merge_before(next, vm_flags,
-                                       anon_vma, file, pgoff+pglen)) {
+                                            anon_vma, file, pgoff+pglen,
+                                            vm_userfaultfd_ctx)) {
                if (prev && addr < prev->vm_end)        /* case 4 */
                        err = vma_adjust(prev, prev->vm_start,
                                addr, prev->vm_pgoff, NULL);
@@ -1247,17 +1264,18 @@ static inline int mlock_future_check(struct mm_struct *mm,
 /*
  * The caller must hold down_write(&current->mm->mmap_sem).
  */
-
-unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+unsigned long do_mmap(struct file *file, unsigned long addr,
                        unsigned long len, unsigned long prot,
-                       unsigned long flags, unsigned long pgoff,
-                       unsigned long *populate)
+                       unsigned long flags, vm_flags_t vm_flags,
+                       unsigned long pgoff, unsigned long *populate)
 {
        struct mm_struct *mm = current->mm;
-       vm_flags_t vm_flags;
 
        *populate = 0;
 
+       if (!len)
+               return -EINVAL;
+
        /*
         * Does the application expect PROT_READ to imply PROT_EXEC?
         *
@@ -1265,12 +1283,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
         *  mounted, in which case we dont add PROT_EXEC.)
         */
        if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
-               if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
+               if (!(file && path_noexec(&file->f_path)))
                        prot |= PROT_EXEC;
 
-       if (!len)
-               return -EINVAL;
-
        if (!(flags & MAP_FIXED))
                addr = round_hint_to_min(addr);
 
@@ -1291,14 +1306,14 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
         * that it represents a valid section of the address space.
         */
        addr = get_unmapped_area(file, addr, len, pgoff, flags);
-       if (addr & ~PAGE_MASK)
+       if (offset_in_page(addr))
                return addr;
 
        /* Do simple checking here so the lower-level routines won't have
         * to. we assume access permissions have been handled by the open
         * of the memory object, so we don't do any here.
         */
-       vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
+       vm_flags |= calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
                        mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 
        if (flags & MAP_LOCKED)
@@ -1337,7 +1352,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
                case MAP_PRIVATE:
                        if (!(file->f_mode & FMODE_READ))
                                return -EACCES;
-                       if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
+                       if (path_noexec(&file->f_path)) {
                                if (vm_flags & VM_EXEC)
                                        return -EPERM;
                                vm_flags &= ~VM_MAYEXEC;
@@ -1401,13 +1416,13 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
                unsigned long, fd, unsigned long, pgoff)
 {
        struct file *file = NULL;
-       unsigned long retval = -EBADF;
+       unsigned long retval;
 
        if (!(flags & MAP_ANONYMOUS)) {
                audit_mmap_fd(fd, flags);
                file = fget(fd);
                if (!file)
-                       goto out;
+                       return -EBADF;
                if (is_file_hugepages(file))
                        len = ALIGN(len, huge_page_size(hstate_file(file)));
                retval = -EINVAL;
@@ -1442,7 +1457,6 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
 out_fput:
        if (file)
                fput(file);
-out:
        return retval;
 }
 
@@ -1462,7 +1476,7 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
 
        if (copy_from_user(&a, arg, sizeof(a)))
                return -EFAULT;
-       if (a.offset & ~PAGE_MASK)
+       if (offset_in_page(a.offset))
                return -EINVAL;
 
        return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
@@ -1479,13 +1493,14 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
 int vma_wants_writenotify(struct vm_area_struct *vma)
 {
        vm_flags_t vm_flags = vma->vm_flags;
+       const struct vm_operations_struct *vm_ops = vma->vm_ops;
 
        /* If it was private or non-writable, the write bit is already clear */
        if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
                return 0;
 
        /* The backer wishes to know when pages are first written to? */
-       if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+       if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
                return 1;
 
        /* The open routine did something to the protections that pgprot_modify
@@ -1550,7 +1565,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
        }
 
        /* Clear old maps */
-       error = -ENOMEM;
        while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
                              &rb_parent)) {
                if (do_munmap(mm, addr, len))
@@ -1570,8 +1584,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
        /*
         * Can we just expand an old mapping?
         */
-       vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff,
-                       NULL);
+       vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
+                       NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
        if (vma)
                goto out;
 
@@ -1651,7 +1665,7 @@ out:
                                        vma == get_gate_vma(current->mm)))
                        mm->locked_vm += (len >> PAGE_SHIFT);
                else
-                       vma->vm_flags &= ~VM_LOCKED;
+                       vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
        }
 
        if (file)
@@ -1977,7 +1991,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
         * can happen with large stack limits and large mmap()
         * allocations.
         */
-       if (addr & ~PAGE_MASK) {
+       if (offset_in_page(addr)) {
                VM_BUG_ON(addr != -ENOMEM);
                info.flags = 0;
                info.low_limit = TASK_UNMAPPED_BASE;
@@ -2013,7 +2027,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 
        if (addr > TASK_SIZE - len)
                return -ENOMEM;
-       if (addr & ~PAGE_MASK)
+       if (offset_in_page(addr))
                return -EINVAL;
 
        addr = arch_rebalance_pgtables(addr, len);
@@ -2035,7 +2049,6 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
                return vma;
 
        rb_node = mm->mm_rb.rb_node;
-       vma = NULL;
 
        while (rb_node) {
                struct vm_area_struct *tmp;
@@ -2127,10 +2140,6 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
        if (security_vm_enough_memory_mm(mm, grow))
                return -ENOMEM;
 
-       /* Ok, everything looks good - let it rip */
-       if (vma->vm_flags & VM_LOCKED)
-               mm->locked_vm += grow;
-       vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
        return 0;
 }
 
@@ -2141,32 +2150,28 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
  */
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
-       int error;
+       struct mm_struct *mm = vma->vm_mm;
+       int error = 0;
 
        if (!(vma->vm_flags & VM_GROWSUP))
                return -EFAULT;
 
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
+       /* Guard against wrapping around to address 0. */
+       if (address < PAGE_ALIGN(address+4))
+               address = PAGE_ALIGN(address+4);
+       else
+               return -ENOMEM;
+
+       /* We must make sure the anon_vma is allocated. */
        if (unlikely(anon_vma_prepare(vma)))
                return -ENOMEM;
-       vma_lock_anon_vma(vma);
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
         * is required to hold the mmap_sem in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
-        * Also guard against wrapping around to address 0.
         */
-       if (address < PAGE_ALIGN(address+4))
-               address = PAGE_ALIGN(address+4);
-       else {
-               vma_unlock_anon_vma(vma);
-               return -ENOMEM;
-       }
-       error = 0;
+       anon_vma_lock_write(vma->anon_vma);
 
        /* Somebody else might have raced and expanded it already */
        if (address > vma->vm_end) {
@@ -2184,29 +2189,33 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                 * updates, but we only hold a shared mmap_sem
                                 * lock here, so we need to protect against
                                 * concurrent vma expansions.
-                                * vma_lock_anon_vma() doesn't help here, as
+                                * anon_vma_lock_write() doesn't help here, as
                                 * we don't guarantee that all growable vmas
                                 * in a mm share the same root anon vma.
                                 * So, we reuse mm->page_table_lock to guard
                                 * against concurrent vma expansions.
                                 */
-                               spin_lock(&vma->vm_mm->page_table_lock);
+                               spin_lock(&mm->page_table_lock);
+                               if (vma->vm_flags & VM_LOCKED)
+                                       mm->locked_vm += grow;
+                               vm_stat_account(mm, vma->vm_flags,
+                                               vma->vm_file, grow);
                                anon_vma_interval_tree_pre_update_vma(vma);
                                vma->vm_end = address;
                                anon_vma_interval_tree_post_update_vma(vma);
                                if (vma->vm_next)
                                        vma_gap_update(vma->vm_next);
                                else
-                                       vma->vm_mm->highest_vm_end = address;
-                               spin_unlock(&vma->vm_mm->page_table_lock);
+                                       mm->highest_vm_end = address;
+                               spin_unlock(&mm->page_table_lock);
 
                                perf_event_mmap(vma);
                        }
                }
        }
-       vma_unlock_anon_vma(vma);
+       anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma_merge(vma, vma->vm_flags);
-       validate_mm(vma->vm_mm);
+       validate_mm(mm);
        return error;
 }
 #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
@@ -2217,27 +2226,24 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 int expand_downwards(struct vm_area_struct *vma,
                                   unsigned long address)
 {
+       struct mm_struct *mm = vma->vm_mm;
        int error;
 
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
-       if (unlikely(anon_vma_prepare(vma)))
-               return -ENOMEM;
-
        address &= PAGE_MASK;
        error = security_mmap_addr(address);
        if (error)
                return error;
 
-       vma_lock_anon_vma(vma);
+       /* We must make sure the anon_vma is allocated. */
+       if (unlikely(anon_vma_prepare(vma)))
+               return -ENOMEM;
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
         * is required to hold the mmap_sem in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
         */
+       anon_vma_lock_write(vma->anon_vma);
 
        /* Somebody else might have raced and expanded it already */
        if (address < vma->vm_start) {
@@ -2255,27 +2261,31 @@ int expand_downwards(struct vm_area_struct *vma,
                                 * updates, but we only hold a shared mmap_sem
                                 * lock here, so we need to protect against
                                 * concurrent vma expansions.
-                                * vma_lock_anon_vma() doesn't help here, as
+                                * anon_vma_lock_write() doesn't help here, as
                                 * we don't guarantee that all growable vmas
                                 * in a mm share the same root anon vma.
                                 * So, we reuse mm->page_table_lock to guard
                                 * against concurrent vma expansions.
                                 */
-                               spin_lock(&vma->vm_mm->page_table_lock);
+                               spin_lock(&mm->page_table_lock);
+                               if (vma->vm_flags & VM_LOCKED)
+                                       mm->locked_vm += grow;
+                               vm_stat_account(mm, vma->vm_flags,
+                                               vma->vm_file, grow);
                                anon_vma_interval_tree_pre_update_vma(vma);
                                vma->vm_start = address;
                                vma->vm_pgoff -= grow;
                                anon_vma_interval_tree_post_update_vma(vma);
                                vma_gap_update(vma);
-                               spin_unlock(&vma->vm_mm->page_table_lock);
+                               spin_unlock(&mm->page_table_lock);
 
                                perf_event_mmap(vma);
                        }
                }
        }
-       vma_unlock_anon_vma(vma);
+       anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma_merge(vma, vma->vm_flags);
-       validate_mm(vma->vm_mm);
+       validate_mm(mm);
        return error;
 }
 
@@ -2442,7 +2452,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
              unsigned long addr, int new_below)
 {
        struct vm_area_struct *new;
-       int err = -ENOMEM;
+       int err;
 
        if (is_vm_hugetlb_page(vma) && (addr &
                                        ~(huge_page_mask(hstate_vma(vma)))))
@@ -2450,7 +2460,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 
        new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
        if (!new)
-               goto out_err;
+               return -ENOMEM;
 
        /* most fields are the same, copy all, and then fixup */
        *new = *vma;
@@ -2498,7 +2508,6 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
        mpol_put(vma_policy(new));
  out_free_vma:
        kmem_cache_free(vm_area_cachep, new);
- out_err:
        return err;
 }
 
@@ -2525,7 +2534,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
        unsigned long end;
        struct vm_area_struct *vma, *prev, *last;
 
-       if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
+       if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
                return -EINVAL;
 
        len = PAGE_ALIGN(len);
@@ -2659,12 +2668,29 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
        if (!vma || !(vma->vm_flags & VM_SHARED))
                goto out;
 
-       if (start < vma->vm_start || start + size > vma->vm_end)
+       if (start < vma->vm_start)
                goto out;
 
-       if (pgoff == linear_page_index(vma, start)) {
-               ret = 0;
-               goto out;
+       if (start + size > vma->vm_end) {
+               struct vm_area_struct *next;
+
+               for (next = vma->vm_next; next; next = next->vm_next) {
+                       /* hole between vmas ? */
+                       if (next->vm_start != next->vm_prev->vm_end)
+                               goto out;
+
+                       if (next->vm_file != vma->vm_file)
+                               goto out;
+
+                       if (next->vm_flags != vma->vm_flags)
+                               goto out;
+
+                       if (start + size <= next->vm_end)
+                               break;
+               }
+
+               if (!next)
+                       goto out;
        }
 
        prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
@@ -2674,9 +2700,16 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
        flags &= MAP_NONBLOCK;
        flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
        if (vma->vm_flags & VM_LOCKED) {
+               struct vm_area_struct *tmp;
                flags |= MAP_LOCKED;
+
                /* drop PG_Mlocked flag for over-mapped range */
-               munlock_vma_pages_range(vma, start, start + size);
+               for (tmp = vma; tmp->vm_start >= start + size;
+                               tmp = tmp->vm_next) {
+                       munlock_vma_pages_range(tmp,
+                                       max(tmp->vm_start, start),
+                                       min(tmp->vm_end, start + size));
+               }
        }
 
        file = get_file(vma->vm_file);
@@ -2723,7 +2756,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
        flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
 
        error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
-       if (error & ~PAGE_MASK)
+       if (offset_in_page(error))
                return error;
 
        error = mlock_future_check(mm, mm->def_flags, len);
@@ -2757,7 +2790,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
 
        /* Can we just expand an old private anonymous mapping? */
        vma = vma_merge(mm, prev, addr, addr + len, flags,
-                                       NULL, NULL, pgoff, NULL);
+                       NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
        if (vma)
                goto out;
 
@@ -2859,6 +2892,13 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
        struct vm_area_struct *prev;
        struct rb_node **rb_link, *rb_parent;
 
+       if (find_vma_links(mm, vma->vm_start, vma->vm_end,
+                          &prev, &rb_link, &rb_parent))
+               return -ENOMEM;
+       if ((vma->vm_flags & VM_ACCOUNT) &&
+            security_vm_enough_memory_mm(mm, vma_pages(vma)))
+               return -ENOMEM;
+
        /*
         * The vm_pgoff of a purely anonymous vma should be irrelevant
         * until its first write fault, when page's anon_vma and index
@@ -2871,16 +2911,10 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
         * using the existing file pgoff checks and manipulations.
         * Similarly in do_mmap_pgoff and in do_brk.
         */
-       if (!vma->vm_file) {
+       if (vma_is_anonymous(vma)) {
                BUG_ON(vma->anon_vma);
                vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
        }
-       if (find_vma_links(mm, vma->vm_start, vma->vm_end,
-                          &prev, &rb_link, &rb_parent))
-               return -ENOMEM;
-       if ((vma->vm_flags & VM_ACCOUNT) &&
-            security_vm_enough_memory_mm(mm, vma_pages(vma)))
-               return -ENOMEM;
 
        vma_link(mm, vma, prev, rb_link, rb_parent);
        return 0;
@@ -2905,7 +2939,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
         * If anonymous vma has not yet been faulted, update new pgoff
         * to match new location, to increase its chance of merging.
         */
-       if (unlikely(!vma->vm_file && !vma->anon_vma)) {
+       if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
                pgoff = addr >> PAGE_SHIFT;
                faulted_in_anon_vma = false;
        }
@@ -2913,7 +2947,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
        if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
                return NULL;    /* should never get here */
        new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
-                       vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
+                           vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
+                           vma->vm_userfaultfd_ctx);
        if (new_vma) {
                /*
                 * Source vma may have been merged into new_vma
@@ -2938,30 +2973,31 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
        } else {
                new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
-               if (new_vma) {
-                       *new_vma = *vma;
-                       new_vma->vm_start = addr;
-                       new_vma->vm_end = addr + len;
-                       new_vma->vm_pgoff = pgoff;
-                       if (vma_dup_policy(vma, new_vma))
-                               goto out_free_vma;
-                       INIT_LIST_HEAD(&new_vma->anon_vma_chain);
-                       if (anon_vma_clone(new_vma, vma))
-                               goto out_free_mempol;
-                       if (new_vma->vm_file)
-                               get_file(new_vma->vm_file);
-                       if (new_vma->vm_ops && new_vma->vm_ops->open)
-                               new_vma->vm_ops->open(new_vma);
-                       vma_link(mm, new_vma, prev, rb_link, rb_parent);
-                       *need_rmap_locks = false;
-               }
+               if (!new_vma)
+                       goto out;
+               *new_vma = *vma;
+               new_vma->vm_start = addr;
+               new_vma->vm_end = addr + len;
+               new_vma->vm_pgoff = pgoff;
+               if (vma_dup_policy(vma, new_vma))
+                       goto out_free_vma;
+               INIT_LIST_HEAD(&new_vma->anon_vma_chain);
+               if (anon_vma_clone(new_vma, vma))
+                       goto out_free_mempol;
+               if (new_vma->vm_file)
+                       get_file(new_vma->vm_file);
+               if (new_vma->vm_ops && new_vma->vm_ops->open)
+                       new_vma->vm_ops->open(new_vma);
+               vma_link(mm, new_vma, prev, rb_link, rb_parent);
+               *need_rmap_locks = false;
        }
        return new_vma;
 
- out_free_mempol:
+out_free_mempol:
        mpol_put(vma_policy(new_vma));
- out_free_vma:
+out_free_vma:
        kmem_cache_free(vm_area_cachep, new_vma);
+out:
        return NULL;
 }
 
@@ -3013,21 +3049,13 @@ static int special_mapping_fault(struct vm_area_struct *vma,
        pgoff_t pgoff;
        struct page **pages;
 
-       /*
-        * special mappings have no vm_file, and in that case, the mm
-        * uses vm_pgoff internally. So we have to subtract it from here.
-        * We are allowed to do this because we are the mm; do not copy
-        * this code into drivers!
-        */
-       pgoff = vmf->pgoff - vma->vm_pgoff;
-
        if (vma->vm_ops == &legacy_special_mapping_vmops)
                pages = vma->vm_private_data;
        else
                pages = ((struct vm_special_mapping *)vma->vm_private_data)->
                        pages;
 
-       for (; pgoff && *pages; ++pages)
+       for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
                pgoff--;
 
        if (*pages) {
@@ -3043,8 +3071,8 @@ static int special_mapping_fault(struct vm_area_struct *vma,
 static struct vm_area_struct *__install_special_mapping(
        struct mm_struct *mm,
        unsigned long addr, unsigned long len,
-       unsigned long vm_flags, const struct vm_operations_struct *ops,
-       void *priv)
+       unsigned long vm_flags, void *priv,
+       const struct vm_operations_struct *ops)
 {
        int ret;
        struct vm_area_struct *vma;
@@ -3093,8 +3121,8 @@ struct vm_area_struct *_install_special_mapping(
        unsigned long addr, unsigned long len,
        unsigned long vm_flags, const struct vm_special_mapping *spec)
 {
-       return __install_special_mapping(mm, addr, len, vm_flags,
-                                        &special_mapping_vmops, (void *)spec);
+       return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
+                                       &special_mapping_vmops);
 }
 
 int install_special_mapping(struct mm_struct *mm,
@@ -3102,8 +3130,8 @@ int install_special_mapping(struct mm_struct *mm,
                            unsigned long vm_flags, struct page **pages)
 {
        struct vm_area_struct *vma = __install_special_mapping(
-               mm, addr, len, vm_flags, &legacy_special_mapping_vmops,
-               (void *)pages);
+               mm, addr, len, vm_flags, (void *)pages,
+               &legacy_special_mapping_vmops);
 
        return PTR_ERR_OR_ZERO(vma);
 }