#include <linux/string.h>
#include <linux/dma-debug.h>
#include <linux/debugfs.h>
+#include <linux/userfaultfd_k.h>
#include <asm/io.h>
#include <asm/pgalloc.h>
#ifdef HAVE_GENERIC_MMU_GATHER
-static int tlb_next_batch(struct mmu_gather *tlb)
+static bool tlb_next_batch(struct mmu_gather *tlb)
{
struct mmu_gather_batch *batch;
batch = tlb->active;
if (batch->next) {
tlb->active = batch->next;
- return 1;
+ return true;
}
if (tlb->batch_count == MAX_GATHER_BATCH_COUNT)
- return 0;
+ return false;
batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
if (!batch)
- return 0;
+ return false;
tlb->batch_count++;
batch->next = NULL;
tlb->active->next = batch;
tlb->active = batch;
- return 1;
+ return true;
}
/* tlb_gather_mmu
goto oom;
cow_user_page(new_page, old_page, address, vma);
}
- __SetPageUptodate(new_page);
if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
goto oom_free_new;
+ __SetPageUptodate(new_page);
+
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
/*
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
if (!pte_none(*page_table))
goto unlock;
+ /* Deliver the page fault to userland, check inside PT lock */
+ if (userfaultfd_missing(vma)) {
+ pte_unmap_unlock(page_table, ptl);
+ return handle_userfault(vma, address, flags,
+ VM_UFFD_MISSING);
+ }
goto setpte;
}
page = alloc_zeroed_user_highpage_movable(vma, address);
if (!page)
goto oom;
+
+ if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
+ goto oom_free_page;
+
/*
* The memory barrier inside __SetPageUptodate makes sure that
* preceeding stores to the page contents become visible before
*/
__SetPageUptodate(page);
- if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
- goto oom_free_page;
-
entry = mk_pte(page, vma->vm_page_prot);
if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry));
if (!pte_none(*page_table))
goto release;
+ /* Deliver the page fault to userland, check inside PT lock */
+ if (userfaultfd_missing(vma)) {
+ pte_unmap_unlock(page_table, ptl);
+ mem_cgroup_cancel_charge(page, memcg);
+ page_cache_release(page);
+ return handle_userfault(vma, address, flags,
+ VM_UFFD_MISSING);
+ }
+
inc_mm_counter_fast(mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
mem_cgroup_commit_charge(page, memcg, false);
return 0;
}
+static int create_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmd, unsigned int flags)
+{
+ if (vma_is_anonymous(vma))
+ return do_huge_pmd_anonymous_page(mm, vma, address, pmd, flags);
+ if (vma->vm_ops->pmd_fault)
+ return vma->vm_ops->pmd_fault(vma, address, pmd, flags);
+ return VM_FAULT_FALLBACK;
+}
+
+static int wp_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmd, pmd_t orig_pmd,
+ unsigned int flags)
+{
+ if (vma_is_anonymous(vma))
+ return do_huge_pmd_wp_page(mm, vma, address, pmd, orig_pmd);
+ if (vma->vm_ops->pmd_fault)
+ return vma->vm_ops->pmd_fault(vma, address, pmd, flags);
+ return VM_FAULT_FALLBACK;
+}
+
/*
* These routines also need to handle stuff like marking pages dirty
* and/or accessed for architectures that don't do it in hardware (most
barrier();
if (!pte_present(entry)) {
if (pte_none(entry)) {
- if (vma->vm_ops)
+ if (vma_is_anonymous(vma))
+ return do_anonymous_page(mm, vma, address,
+ pte, pmd, flags);
+ else
return do_fault(mm, vma, address, pte, pmd,
flags, entry);
-
- return do_anonymous_page(mm, vma, address, pte, pmd,
- flags);
}
return do_swap_page(mm, vma, address,
pte, pmd, flags, entry);
if (!pmd)
return VM_FAULT_OOM;
if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) {
- int ret = VM_FAULT_FALLBACK;
- if (!vma->vm_ops)
- ret = do_huge_pmd_anonymous_page(mm, vma, address,
- pmd, flags);
+ int ret = create_huge_pmd(mm, vma, address, pmd, flags);
if (!(ret & VM_FAULT_FALLBACK))
return ret;
} else {
orig_pmd, pmd);
if (dirty && !pmd_write(orig_pmd)) {
- ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
- orig_pmd);
+ ret = wp_huge_pmd(mm, vma, address, pmd,
+ orig_pmd, flags);
if (!(ret & VM_FAULT_FALLBACK))
return ret;
} else {
if (unlikely(pmd_none(*pmd)) &&
unlikely(__pte_alloc(mm, vma, pmd, address)))
return VM_FAULT_OOM;
- /* if an huge pmd materialized from under us just retry later */
- if (unlikely(pmd_trans_huge(*pmd)))
+ /*
+ * If a huge pmd materialized under us just retry later. Use
+ * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
+ * didn't become pmd_trans_huge under us and then back to pmd_none, as
+ * a result of MADV_DONTNEED running immediately after a huge pmd fault
+ * in a different thread of this mm, in turn leading to a misleading
+ * pmd_trans_huge() retval. All we have to ensure is that it is a
+ * regular pmd that we can walk with pte_offset_map() and we can do that
+ * through an atomic read in C, which is what pmd_trans_unstable()
+ * provides.
+ */
+ if (unlikely(pmd_trans_unstable(pmd)))
return 0;
/*
* A regular pmd is established and it can't morph into a huge pmd
if (buf) {
char *p;
- p = d_path(&f->f_path, buf, PAGE_SIZE);
+ p = file_path(f, buf, PAGE_SIZE);
if (IS_ERR(p))
p = "?";
printk("%s%s[%lx+%lx]", prefix, kbasename(p),