KVM: nVMX: avoid incorrect preemption timer vmexit in nested guest

[kvmfornfv.git] / kernel / mm / mmap.c
diff --git a/kernel/mm/mmap.c b/kernel/mm/mmap.c

index bb50cac..455772a 100644 (file)
--- a/kernel/mm/mmap.c
+++ b/kernel/mm/mmap.c
@@ -41,6 +41,7 @@
  #include <linux/notifier.h>
  #include <linux/memory.h>
  #include <linux/printk.h>
+#include <linux/userfaultfd_k.h>
  
  #include <asm/uaccess.h>
  #include <asm/cacheflush.h>
@@ -440,12 +441,16 @@ static void validate_mm(struct mm_struct *mm)
         struct vm_area_struct *vma = mm->mmap;
  
         while (vma) {
+               struct anon_vma *anon_vma = vma->anon_vma;
                 struct anon_vma_chain *avc;
  
-               vma_lock_anon_vma(vma);
-               list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
-                       anon_vma_interval_tree_verify(avc);
-               vma_unlock_anon_vma(vma);
+               if (anon_vma) {
+                       anon_vma_lock_read(anon_vma);
+                       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+                               anon_vma_interval_tree_verify(avc);
+                       anon_vma_unlock_read(anon_vma);
+               }
+
                 highest_address = vma->vm_end;
                 vma = vma->vm_next;
                 i++;
@@ -919,7 +924,8 @@ again:                      remove_next = 1 + (end > next->vm_end);
   * per-vma resources, so we don't attempt to merge those.
   */
  static inline int is_mergeable_vma(struct vm_area_struct *vma,
-                       struct file *file, unsigned long vm_flags)
+                               struct file *file, unsigned long vm_flags,
+                               struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
  {
         /*
          * VM_SOFTDIRTY should not prevent from VMA merging, if we
@@ -935,6 +941,8 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
                 return 0;
         if (vma->vm_ops && vma->vm_ops->close)
                 return 0;
+       if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
+               return 0;
         return 1;
  }
  
@@ -965,9 +973,11 @@ static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
   */
  static int
  can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
-       struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+                    struct anon_vma *anon_vma, struct file *file,
+                    pgoff_t vm_pgoff,
+                    struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
  {
-       if (is_mergeable_vma(vma, file, vm_flags) &&
+       if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
             is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
                 if (vma->vm_pgoff == vm_pgoff)
                         return 1;
@@ -984,9 +994,11 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
   */
  static int
  can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
-       struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+                   struct anon_vma *anon_vma, struct file *file,
+                   pgoff_t vm_pgoff,
+                   struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
  {
-       if (is_mergeable_vma(vma, file, vm_flags) &&
+       if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
             is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
                 pgoff_t vm_pglen;
                 vm_pglen = vma_pages(vma);
@@ -1029,7 +1041,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                         struct vm_area_struct *prev, unsigned long addr,
                         unsigned long end, unsigned long vm_flags,
                         struct anon_vma *anon_vma, struct file *file,
-                       pgoff_t pgoff, struct mempolicy *policy)
+                       pgoff_t pgoff, struct mempolicy *policy,
+                       struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
  {
         pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
         struct vm_area_struct *area, *next;
@@ -1056,14 +1069,17 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
         if (prev && prev->vm_end == addr &&
                         mpol_equal(vma_policy(prev), policy) &&
                         can_vma_merge_after(prev, vm_flags,
-                                               anon_vma, file, pgoff)) {
+                                           anon_vma, file, pgoff,
+                                           vm_userfaultfd_ctx)) {
                 /*
                  * OK, it can.  Can we now merge in the successor as well?
                  */
                 if (next && end == next->vm_start &&
                                 mpol_equal(policy, vma_policy(next)) &&
                                 can_vma_merge_before(next, vm_flags,
-                                       anon_vma, file, pgoff+pglen) &&
+                                                    anon_vma, file,
+                                                    pgoff+pglen,
+                                                    vm_userfaultfd_ctx) &&
                                 is_mergeable_anon_vma(prev->anon_vma,
                                                       next->anon_vma, NULL)) {
                                                         /* cases 1, 6 */
@@ -1084,7 +1100,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
         if (next && end == next->vm_start &&
                         mpol_equal(policy, vma_policy(next)) &&
                         can_vma_merge_before(next, vm_flags,
-                                       anon_vma, file, pgoff+pglen)) {
+                                            anon_vma, file, pgoff+pglen,
+                                            vm_userfaultfd_ctx)) {
                 if (prev && addr < prev->vm_end)        /* case 4 */
                         err = vma_adjust(prev, prev->vm_start,
                                 addr, prev->vm_pgoff, NULL);
@@ -1247,17 +1264,18 @@ static inline int mlock_future_check(struct mm_struct *mm,
  /*
   * The caller must hold down_write(&current->mm->mmap_sem).
   */
-
-unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+unsigned long do_mmap(struct file *file, unsigned long addr,
                         unsigned long len, unsigned long prot,
-                       unsigned long flags, unsigned long pgoff,
-                       unsigned long *populate)
+                       unsigned long flags, vm_flags_t vm_flags,
+                       unsigned long pgoff, unsigned long *populate)
  {
         struct mm_struct *mm = current->mm;
-       vm_flags_t vm_flags;
  
         *populate = 0;
  
+       if (!len)
+               return -EINVAL;
+
         /*
          * Does the application expect PROT_READ to imply PROT_EXEC?
          *
@@ -1265,12 +1283,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
          *  mounted, in which case we dont add PROT_EXEC.)
          */
         if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
-               if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
+               if (!(file && path_noexec(&file->f_path)))
                         prot |= PROT_EXEC;
  
-       if (!len)
-               return -EINVAL;
-
         if (!(flags & MAP_FIXED))
                 addr = round_hint_to_min(addr);
  
@@ -1291,14 +1306,14 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
          * that it represents a valid section of the address space.
          */
         addr = get_unmapped_area(file, addr, len, pgoff, flags);
-       if (addr & ~PAGE_MASK)
+       if (offset_in_page(addr))
                 return addr;
  
         /* Do simple checking here so the lower-level routines won't have
          * to. we assume access permissions have been handled by the open
          * of the memory object, so we don't do any here.
          */
-       vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
+       vm_flags |= calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
                         mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
  
         if (flags & MAP_LOCKED)
@@ -1337,7 +1352,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
                 case MAP_PRIVATE:
                         if (!(file->f_mode & FMODE_READ))
                                 return -EACCES;
-                       if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
+                       if (path_noexec(&file->f_path)) {
                                 if (vm_flags & VM_EXEC)
                                         return -EPERM;
                                 vm_flags &= ~VM_MAYEXEC;
@@ -1401,13 +1416,13 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
                 unsigned long, fd, unsigned long, pgoff)
  {
         struct file *file = NULL;
-       unsigned long retval = -EBADF;
+       unsigned long retval;
  
         if (!(flags & MAP_ANONYMOUS)) {
                 audit_mmap_fd(fd, flags);
                 file = fget(fd);
                 if (!file)
-                       goto out;
+                       return -EBADF;
                 if (is_file_hugepages(file))
                         len = ALIGN(len, huge_page_size(hstate_file(file)));
                 retval = -EINVAL;
@@ -1442,7 +1457,6 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
  out_fput:
         if (file)
                 fput(file);
-out:
         return retval;
  }
  
@@ -1462,7 +1476,7 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
  
         if (copy_from_user(&a, arg, sizeof(a)))
                 return -EFAULT;
-       if (a.offset & ~PAGE_MASK)
+       if (offset_in_page(a.offset))
                 return -EINVAL;
  
         return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
@@ -1479,13 +1493,14 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
  int vma_wants_writenotify(struct vm_area_struct *vma)
  {
         vm_flags_t vm_flags = vma->vm_flags;
+       const struct vm_operations_struct *vm_ops = vma->vm_ops;
  
         /* If it was private or non-writable, the write bit is already clear */
         if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
                 return 0;
  
         /* The backer wishes to know when pages are first written to? */
-       if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+       if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
                 return 1;
  
         /* The open routine did something to the protections that pgprot_modify
@@ -1550,7 +1565,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
         }
  
         /* Clear old maps */
-       error = -ENOMEM;
         while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
                               &rb_parent)) {
                 if (do_munmap(mm, addr, len))
@@ -1570,8 +1584,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
         /*
          * Can we just expand an old mapping?
          */
-       vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff,
-                       NULL);
+       vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
+                       NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
         if (vma)
                 goto out;
  
@@ -1651,7 +1665,7 @@ out:
                                         vma == get_gate_vma(current->mm)))
                         mm->locked_vm += (len >> PAGE_SHIFT);
                 else
-                       vma->vm_flags &= ~VM_LOCKED;
+                       vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
         }
  
         if (file)
@@ -1977,7 +1991,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
          * can happen with large stack limits and large mmap()
          * allocations.
          */
-       if (addr & ~PAGE_MASK) {
+       if (offset_in_page(addr)) {
                 VM_BUG_ON(addr != -ENOMEM);
                 info.flags = 0;
                 info.low_limit = TASK_UNMAPPED_BASE;
@@ -2013,7 +2027,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
  
         if (addr > TASK_SIZE - len)
                 return -ENOMEM;
-       if (addr & ~PAGE_MASK)
+       if (offset_in_page(addr))
                 return -EINVAL;
  
         addr = arch_rebalance_pgtables(addr, len);
@@ -2035,7 +2049,6 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
                 return vma;
  
         rb_node = mm->mm_rb.rb_node;
-       vma = NULL;
  
         while (rb_node) {
                 struct vm_area_struct *tmp;
@@ -2127,10 +2140,6 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
         if (security_vm_enough_memory_mm(mm, grow))
                 return -ENOMEM;
  
-       /* Ok, everything looks good - let it rip */
-       if (vma->vm_flags & VM_LOCKED)
-               mm->locked_vm += grow;
-       vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
         return 0;
  }
  
@@ -2141,32 +2150,28 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
   */
  int expand_upwards(struct vm_area_struct *vma, unsigned long address)
  {
-       int error;
+       struct mm_struct *mm = vma->vm_mm;
+       int error = 0;
  
         if (!(vma->vm_flags & VM_GROWSUP))
                 return -EFAULT;
  
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
+       /* Guard against wrapping around to address 0. */
+       if (address < PAGE_ALIGN(address+4))
+               address = PAGE_ALIGN(address+4);
+       else
+               return -ENOMEM;
+
+       /* We must make sure the anon_vma is allocated. */
         if (unlikely(anon_vma_prepare(vma)))
                 return -ENOMEM;
-       vma_lock_anon_vma(vma);
  
         /*
          * vma->vm_start/vm_end cannot change under us because the caller
          * is required to hold the mmap_sem in read mode.  We need the
          * anon_vma lock to serialize against concurrent expand_stacks.
-        * Also guard against wrapping around to address 0.
          */
-       if (address < PAGE_ALIGN(address+4))
-               address = PAGE_ALIGN(address+4);
-       else {
-               vma_unlock_anon_vma(vma);
-               return -ENOMEM;
-       }
-       error = 0;
+       anon_vma_lock_write(vma->anon_vma);
  
         /* Somebody else might have raced and expanded it already */
         if (address > vma->vm_end) {
@@ -2184,29 +2189,33 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                  * updates, but we only hold a shared mmap_sem
                                  * lock here, so we need to protect against
                                  * concurrent vma expansions.
-                                * vma_lock_anon_vma() doesn't help here, as
+                                * anon_vma_lock_write() doesn't help here, as
                                  * we don't guarantee that all growable vmas
                                  * in a mm share the same root anon vma.
                                  * So, we reuse mm->page_table_lock to guard
                                  * against concurrent vma expansions.
                                  */
-                               spin_lock(&vma->vm_mm->page_table_lock);
+                               spin_lock(&mm->page_table_lock);
+                               if (vma->vm_flags & VM_LOCKED)
+                                       mm->locked_vm += grow;
+                               vm_stat_account(mm, vma->vm_flags,
+                                               vma->vm_file, grow);
                                 anon_vma_interval_tree_pre_update_vma(vma);
                                 vma->vm_end = address;
                                 anon_vma_interval_tree_post_update_vma(vma);
                                 if (vma->vm_next)
                                         vma_gap_update(vma->vm_next);
                                 else
-                                       vma->vm_mm->highest_vm_end = address;
-                               spin_unlock(&vma->vm_mm->page_table_lock);
+                                       mm->highest_vm_end = address;
+                               spin_unlock(&mm->page_table_lock);
  
                                 perf_event_mmap(vma);
                         }
                 }
         }
-       vma_unlock_anon_vma(vma);
+       anon_vma_unlock_write(vma->anon_vma);
         khugepaged_enter_vma_merge(vma, vma->vm_flags);
-       validate_mm(vma->vm_mm);
+       validate_mm(mm);
         return error;
  }
  #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
@@ -2217,27 +2226,24 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
  int expand_downwards(struct vm_area_struct *vma,
                                    unsigned long address)
  {
+       struct mm_struct *mm = vma->vm_mm;
         int error;
  
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
-       if (unlikely(anon_vma_prepare(vma)))
-               return -ENOMEM;
-
         address &= PAGE_MASK;
         error = security_mmap_addr(address);
         if (error)
                 return error;
  
-       vma_lock_anon_vma(vma);
+       /* We must make sure the anon_vma is allocated. */
+       if (unlikely(anon_vma_prepare(vma)))
+               return -ENOMEM;
  
         /*
          * vma->vm_start/vm_end cannot change under us because the caller
          * is required to hold the mmap_sem in read mode.  We need the
          * anon_vma lock to serialize against concurrent expand_stacks.
          */
+       anon_vma_lock_write(vma->anon_vma);
  
         /* Somebody else might have raced and expanded it already */
         if (address < vma->vm_start) {
@@ -2255,27 +2261,31 @@ int expand_downwards(struct vm_area_struct *vma,
                                  * updates, but we only hold a shared mmap_sem
                                  * lock here, so we need to protect against
                                  * concurrent vma expansions.
-                                * vma_lock_anon_vma() doesn't help here, as
+                                * anon_vma_lock_write() doesn't help here, as
                                  * we don't guarantee that all growable vmas
                                  * in a mm share the same root anon vma.
                                  * So, we reuse mm->page_table_lock to guard
                                  * against concurrent vma expansions.
                                  */
-                               spin_lock(&vma->vm_mm->page_table_lock);
+                               spin_lock(&mm->page_table_lock);
+                               if (vma->vm_flags & VM_LOCKED)
+                                       mm->locked_vm += grow;
+                               vm_stat_account(mm, vma->vm_flags,
+                                               vma->vm_file, grow);
                                 anon_vma_interval_tree_pre_update_vma(vma);
                                 vma->vm_start = address;
                                 vma->vm_pgoff -= grow;
                                 anon_vma_interval_tree_post_update_vma(vma);
                                 vma_gap_update(vma);
-                               spin_unlock(&vma->vm_mm->page_table_lock);
+                               spin_unlock(&mm->page_table_lock);
  
                                 perf_event_mmap(vma);
                         }
                 }
         }
-       vma_unlock_anon_vma(vma);
+       anon_vma_unlock_write(vma->anon_vma);
         khugepaged_enter_vma_merge(vma, vma->vm_flags);
-       validate_mm(vma->vm_mm);
+       validate_mm(mm);
         return error;
  }
  
@@ -2442,7 +2452,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
               unsigned long addr, int new_below)
  {
         struct vm_area_struct *new;
-       int err = -ENOMEM;
+       int err;
  
         if (is_vm_hugetlb_page(vma) && (addr &
                                         ~(huge_page_mask(hstate_vma(vma)))))
@@ -2450,7 +2460,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
  
         new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
         if (!new)
-               goto out_err;
+               return -ENOMEM;
  
         /* most fields are the same, copy all, and then fixup */
         *new = *vma;
@@ -2498,7 +2508,6 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
         mpol_put(vma_policy(new));
   out_free_vma:
         kmem_cache_free(vm_area_cachep, new);
- out_err:
         return err;
  }
  
@@ -2525,7 +2534,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
         unsigned long end;
         struct vm_area_struct *vma, *prev, *last;
  
-       if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
+       if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
                 return -EINVAL;
  
         len = PAGE_ALIGN(len);
@@ -2659,12 +2668,29 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
         if (!vma || !(vma->vm_flags & VM_SHARED))
                 goto out;
  
-       if (start < vma->vm_start || start + size > vma->vm_end)
+       if (start < vma->vm_start)
                 goto out;
  
-       if (pgoff == linear_page_index(vma, start)) {
-               ret = 0;
-               goto out;
+       if (start + size > vma->vm_end) {
+               struct vm_area_struct *next;
+
+               for (next = vma->vm_next; next; next = next->vm_next) {
+                       /* hole between vmas ? */
+                       if (next->vm_start != next->vm_prev->vm_end)
+                               goto out;
+
+                       if (next->vm_file != vma->vm_file)
+                               goto out;
+
+                       if (next->vm_flags != vma->vm_flags)
+                               goto out;
+
+                       if (start + size <= next->vm_end)
+                               break;
+               }
+
+               if (!next)
+                       goto out;
         }
  
         prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
@@ -2674,9 +2700,16 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
         flags &= MAP_NONBLOCK;
         flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
         if (vma->vm_flags & VM_LOCKED) {
+               struct vm_area_struct *tmp;
                 flags |= MAP_LOCKED;
+
                 /* drop PG_Mlocked flag for over-mapped range */
-               munlock_vma_pages_range(vma, start, start + size);
+               for (tmp = vma; tmp->vm_start >= start + size;
+                               tmp = tmp->vm_next) {
+                       munlock_vma_pages_range(tmp,
+                                       max(tmp->vm_start, start),
+                                       min(tmp->vm_end, start + size));
+               }
         }
  
         file = get_file(vma->vm_file);
@@ -2723,7 +2756,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
         flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
  
         error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
-       if (error & ~PAGE_MASK)
+       if (offset_in_page(error))
                 return error;
  
         error = mlock_future_check(mm, mm->def_flags, len);
@@ -2757,7 +2790,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
  
         /* Can we just expand an old private anonymous mapping? */
         vma = vma_merge(mm, prev, addr, addr + len, flags,
-                                       NULL, NULL, pgoff, NULL);
+                       NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
         if (vma)
                 goto out;
  
@@ -2859,6 +2892,13 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
         struct vm_area_struct *prev;
         struct rb_node **rb_link, *rb_parent;
  
+       if (find_vma_links(mm, vma->vm_start, vma->vm_end,
+                          &prev, &rb_link, &rb_parent))
+               return -ENOMEM;
+       if ((vma->vm_flags & VM_ACCOUNT) &&
+            security_vm_enough_memory_mm(mm, vma_pages(vma)))
+               return -ENOMEM;
+
         /*
          * The vm_pgoff of a purely anonymous vma should be irrelevant
          * until its first write fault, when page's anon_vma and index
@@ -2871,16 +2911,10 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
          * using the existing file pgoff checks and manipulations.
          * Similarly in do_mmap_pgoff and in do_brk.
          */
-       if (!vma->vm_file) {
+       if (vma_is_anonymous(vma)) {
                 BUG_ON(vma->anon_vma);
                 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
         }
-       if (find_vma_links(mm, vma->vm_start, vma->vm_end,
-                          &prev, &rb_link, &rb_parent))
-               return -ENOMEM;
-       if ((vma->vm_flags & VM_ACCOUNT) &&
-            security_vm_enough_memory_mm(mm, vma_pages(vma)))
-               return -ENOMEM;
  
         vma_link(mm, vma, prev, rb_link, rb_parent);
         return 0;
@@ -2905,7 +2939,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
          * If anonymous vma has not yet been faulted, update new pgoff
          * to match new location, to increase its chance of merging.
          */
-       if (unlikely(!vma->vm_file && !vma->anon_vma)) {
+       if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
                 pgoff = addr >> PAGE_SHIFT;
                 faulted_in_anon_vma = false;
         }
@@ -2913,7 +2947,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
         if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
                 return NULL;    /* should never get here */
         new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
-                       vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
+                           vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
+                           vma->vm_userfaultfd_ctx);
         if (new_vma) {
                 /*
                  * Source vma may have been merged into new_vma
@@ -2938,30 +2973,31 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
         } else {
                 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
-               if (new_vma) {
-                       *new_vma = *vma;
-                       new_vma->vm_start = addr;
-                       new_vma->vm_end = addr + len;
-                       new_vma->vm_pgoff = pgoff;
-                       if (vma_dup_policy(vma, new_vma))
-                               goto out_free_vma;
-                       INIT_LIST_HEAD(&new_vma->anon_vma_chain);
-                       if (anon_vma_clone(new_vma, vma))
-                               goto out_free_mempol;
-                       if (new_vma->vm_file)
-                               get_file(new_vma->vm_file);
-                       if (new_vma->vm_ops && new_vma->vm_ops->open)
-                               new_vma->vm_ops->open(new_vma);
-                       vma_link(mm, new_vma, prev, rb_link, rb_parent);
-                       *need_rmap_locks = false;
-               }
+               if (!new_vma)
+                       goto out;
+               *new_vma = *vma;
+               new_vma->vm_start = addr;
+               new_vma->vm_end = addr + len;
+               new_vma->vm_pgoff = pgoff;
+               if (vma_dup_policy(vma, new_vma))
+                       goto out_free_vma;
+               INIT_LIST_HEAD(&new_vma->anon_vma_chain);
+               if (anon_vma_clone(new_vma, vma))
+                       goto out_free_mempol;
+               if (new_vma->vm_file)
+                       get_file(new_vma->vm_file);
+               if (new_vma->vm_ops && new_vma->vm_ops->open)
+                       new_vma->vm_ops->open(new_vma);
+               vma_link(mm, new_vma, prev, rb_link, rb_parent);
+               *need_rmap_locks = false;
         }
         return new_vma;
  
- out_free_mempol:
+out_free_mempol:
         mpol_put(vma_policy(new_vma));
- out_free_vma:
+out_free_vma:
         kmem_cache_free(vm_area_cachep, new_vma);
+out:
         return NULL;
  }
  
@@ -3013,21 +3049,13 @@ static int special_mapping_fault(struct vm_area_struct *vma,
         pgoff_t pgoff;
         struct page **pages;
  
-       /*
-        * special mappings have no vm_file, and in that case, the mm
-        * uses vm_pgoff internally. So we have to subtract it from here.
-        * We are allowed to do this because we are the mm; do not copy
-        * this code into drivers!
-        */
-       pgoff = vmf->pgoff - vma->vm_pgoff;
-
         if (vma->vm_ops == &legacy_special_mapping_vmops)
                 pages = vma->vm_private_data;
         else
                 pages = ((struct vm_special_mapping *)vma->vm_private_data)->
                         pages;
  
-       for (; pgoff && *pages; ++pages)
+       for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
                 pgoff--;
  
         if (*pages) {
@@ -3043,8 +3071,8 @@ static int special_mapping_fault(struct vm_area_struct *vma,
  static struct vm_area_struct *__install_special_mapping(
         struct mm_struct *mm,
         unsigned long addr, unsigned long len,
-       unsigned long vm_flags, const struct vm_operations_struct *ops,
-       void *priv)
+       unsigned long vm_flags, void *priv,
+       const struct vm_operations_struct *ops)
  {
         int ret;
         struct vm_area_struct *vma;
@@ -3093,8 +3121,8 @@ struct vm_area_struct *_install_special_mapping(
         unsigned long addr, unsigned long len,
         unsigned long vm_flags, const struct vm_special_mapping *spec)
  {
-       return __install_special_mapping(mm, addr, len, vm_flags,
-                                        &special_mapping_vmops, (void *)spec);
+       return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
+                                       &special_mapping_vmops);
  }
  
  int install_special_mapping(struct mm_struct *mm,
@@ -3102,8 +3130,8 @@ int install_special_mapping(struct mm_struct *mm,
                             unsigned long vm_flags, struct page **pages)
  {
         struct vm_area_struct *vma = __install_special_mapping(
-               mm, addr, len, vm_flags, &legacy_special_mapping_vmops,
-               (void *)pages);
+               mm, addr, len, vm_flags, (void *)pages,
+               &legacy_special_mapping_vmops);
  
         return PTR_ERR_OR_ZERO(vma);
  }