These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / fs / proc / task_mmu.c
index 6dee68d..09cd3ed 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/mmu_notifier.h>
+#include <linux/page_idle.h>
 
 #include <asm/elf.h>
 #include <asm/uaccess.h>
@@ -69,6 +70,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
                ptes >> 10,
                pmds >> 10,
                swap << (PAGE_SHIFT-10));
+       hugetlb_report_usage(m, mm);
 }
 
 unsigned long task_vsize(struct mm_struct *mm)
@@ -310,7 +312,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
         */
        if (file) {
                seq_pad(m, ' ');
-               seq_path(m, &file->f_path, "\n");
+               seq_file_path(m, file, "\n");
                goto done;
        }
 
@@ -445,7 +447,10 @@ struct mem_size_stats {
        unsigned long anonymous;
        unsigned long anonymous_thp;
        unsigned long swap;
+       unsigned long shared_hugetlb;
+       unsigned long private_hugetlb;
        u64 pss;
+       u64 swap_pss;
 };
 
 static void smaps_account(struct mem_size_stats *mss, struct page *page,
@@ -458,7 +463,7 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
 
        mss->resident += size;
        /* Accumulate the size in pages that have been accessed. */
-       if (young || PageReferenced(page))
+       if (young || page_is_young(page) || PageReferenced(page))
                mss->referenced += size;
        mapcount = page_mapcount(page);
        if (mapcount >= 2) {
@@ -492,9 +497,20 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
        } else if (is_swap_pte(*pte)) {
                swp_entry_t swpent = pte_to_swp_entry(*pte);
 
-               if (!non_swap_entry(swpent))
+               if (!non_swap_entry(swpent)) {
+                       int mapcount;
+
                        mss->swap += PAGE_SIZE;
-               else if (is_migration_entry(swpent))
+                       mapcount = swp_swapcount(swpent);
+                       if (mapcount >= 2) {
+                               u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT;
+
+                               do_div(pss_delta, mapcount);
+                               mss->swap_pss += pss_delta;
+                       } else {
+                               mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
+                       }
+               } else if (is_migration_entry(swpent))
                        page = migration_entry_to_page(swpent);
        }
 
@@ -597,6 +613,8 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
                [ilog2(VM_HUGEPAGE)]    = "hg",
                [ilog2(VM_NOHUGEPAGE)]  = "nh",
                [ilog2(VM_MERGEABLE)]   = "mg",
+               [ilog2(VM_UFFD_MISSING)]= "um",
+               [ilog2(VM_UFFD_WP)]     = "uw",
        };
        size_t i;
 
@@ -610,12 +628,44 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
        seq_putc(m, '\n');
 }
 
+#ifdef CONFIG_HUGETLB_PAGE
+static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
+                                unsigned long addr, unsigned long end,
+                                struct mm_walk *walk)
+{
+       struct mem_size_stats *mss = walk->private;
+       struct vm_area_struct *vma = walk->vma;
+       struct page *page = NULL;
+
+       if (pte_present(*pte)) {
+               page = vm_normal_page(vma, addr, *pte);
+       } else if (is_swap_pte(*pte)) {
+               swp_entry_t swpent = pte_to_swp_entry(*pte);
+
+               if (is_migration_entry(swpent))
+                       page = migration_entry_to_page(swpent);
+       }
+       if (page) {
+               int mapcount = page_mapcount(page);
+
+               if (mapcount >= 2)
+                       mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
+               else
+                       mss->private_hugetlb += huge_page_size(hstate_vma(vma));
+       }
+       return 0;
+}
+#endif /* HUGETLB_PAGE */
+
 static int show_smap(struct seq_file *m, void *v, int is_pid)
 {
        struct vm_area_struct *vma = v;
        struct mem_size_stats mss;
        struct mm_walk smaps_walk = {
                .pmd_entry = smaps_pte_range,
+#ifdef CONFIG_HUGETLB_PAGE
+               .hugetlb_entry = smaps_hugetlb_range,
+#endif
                .mm = vma->vm_mm,
                .private = &mss,
        };
@@ -637,7 +687,10 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
                   "Referenced:     %8lu kB\n"
                   "Anonymous:      %8lu kB\n"
                   "AnonHugePages:  %8lu kB\n"
+                  "Shared_Hugetlb: %8lu kB\n"
+                  "Private_Hugetlb: %7lu kB\n"
                   "Swap:           %8lu kB\n"
+                  "SwapPss:        %8lu kB\n"
                   "KernelPageSize: %8lu kB\n"
                   "MMUPageSize:    %8lu kB\n"
                   "Locked:         %8lu kB\n",
@@ -651,7 +704,10 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
                   mss.referenced >> 10,
                   mss.anonymous >> 10,
                   mss.anonymous_thp >> 10,
+                  mss.shared_hugetlb >> 10,
+                  mss.private_hugetlb >> 10,
                   mss.swap >> 10,
+                  (unsigned long)(mss.swap_pss >> (10 + PSS_SHIFT)),
                   vma_kernel_pagesize(vma) >> 10,
                   vma_mmu_pagesize(vma) >> 10,
                   (vma->vm_flags & VM_LOCKED) ?
@@ -710,23 +766,6 @@ const struct file_operations proc_tid_smaps_operations = {
        .release        = proc_map_release,
 };
 
-/*
- * We do not want to have constant page-shift bits sitting in
- * pagemap entries and are about to reuse them some time soon.
- *
- * Here's the "migration strategy":
- * 1. when the system boots these bits remain what they are,
- *    but a warning about future change is printed in log;
- * 2. once anyone clears soft-dirty bits via clear_refs file,
- *    these flag is set to denote, that user is aware of the
- *    new API and those page-shift bits change their meaning.
- *    The respective warning is printed in dmesg;
- * 3. In a couple of releases we will remove all the mentions
- *    of page-shift in pagemap entries.
- */
-
-static bool soft_dirty_cleared __read_mostly;
-
 enum clear_refs_types {
        CLEAR_REFS_ALL = 1,
        CLEAR_REFS_ANON,
@@ -753,36 +792,37 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
        pte_t ptent = *pte;
 
        if (pte_present(ptent)) {
+               ptent = ptep_modify_prot_start(vma->vm_mm, addr, pte);
                ptent = pte_wrprotect(ptent);
-               ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
+               ptent = pte_clear_soft_dirty(ptent);
+               ptep_modify_prot_commit(vma->vm_mm, addr, pte, ptent);
        } else if (is_swap_pte(ptent)) {
                ptent = pte_swp_clear_soft_dirty(ptent);
+               set_pte_at(vma->vm_mm, addr, pte, ptent);
        }
-
-       set_pte_at(vma->vm_mm, addr, pte, ptent);
 }
+#else
+static inline void clear_soft_dirty(struct vm_area_struct *vma,
+               unsigned long addr, pte_t *pte)
+{
+}
+#endif
 
+#if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
 static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
                unsigned long addr, pmd_t *pmdp)
 {
-       pmd_t pmd = *pmdp;
+       pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
 
        pmd = pmd_wrprotect(pmd);
-       pmd = pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);
+       pmd = pmd_clear_soft_dirty(pmd);
 
        if (vma->vm_flags & VM_SOFTDIRTY)
                vma->vm_flags &= ~VM_SOFTDIRTY;
 
        set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
 }
-
 #else
-
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
-               unsigned long addr, pte_t *pte)
-{
-}
-
 static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
                unsigned long addr, pmd_t *pmdp)
 {
@@ -808,6 +848,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
 
                /* Clear accessed and referenced bits. */
                pmdp_test_and_clear_young(vma, addr, pmd);
+               test_and_clear_page_young(page);
                ClearPageReferenced(page);
 out:
                spin_unlock(ptl);
@@ -835,6 +876,7 @@ out:
 
                /* Clear accessed and referenced bits. */
                ptep_test_and_clear_young(vma, addr, pte);
+               test_and_clear_page_young(page);
                ClearPageReferenced(page);
        }
        pte_unmap_unlock(pte - 1, ptl);
@@ -887,13 +929,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
        if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
                return -EINVAL;
 
-       if (type == CLEAR_REFS_SOFT_DIRTY) {
-               soft_dirty_cleared = true;
-               pr_warn_once("The pagemap bits 55-60 has changed their meaning!"
-                            " See the linux/Documentation/vm/pagemap.txt for "
-                            "details.\n");
-       }
-
        task = get_proc_task(file_inode(file));
        if (!task)
                return -ESRCH;
@@ -961,36 +996,26 @@ typedef struct {
 struct pagemapread {
        int pos, len;           /* units: PM_ENTRY_BYTES, not bytes */
        pagemap_entry_t *buffer;
-       bool v2;
+       bool show_pfn;
 };
 
 #define PAGEMAP_WALK_SIZE      (PMD_SIZE)
 #define PAGEMAP_WALK_MASK      (PMD_MASK)
 
-#define PM_ENTRY_BYTES      sizeof(pagemap_entry_t)
-#define PM_STATUS_BITS      3
-#define PM_STATUS_OFFSET    (64 - PM_STATUS_BITS)
-#define PM_STATUS_MASK      (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
-#define PM_STATUS(nr)       (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
-#define PM_PSHIFT_BITS      6
-#define PM_PSHIFT_OFFSET    (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
-#define PM_PSHIFT_MASK      (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
-#define __PM_PSHIFT(x)      (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
-#define PM_PFRAME_MASK      ((1LL << PM_PSHIFT_OFFSET) - 1)
-#define PM_PFRAME(x)        ((x) & PM_PFRAME_MASK)
-/* in "new" pagemap pshift bits are occupied with more status bits */
-#define PM_STATUS2(v2, x)   (__PM_PSHIFT(v2 ? x : PAGE_SHIFT))
-
-#define __PM_SOFT_DIRTY      (1LL)
-#define PM_PRESENT          PM_STATUS(4LL)
-#define PM_SWAP             PM_STATUS(2LL)
-#define PM_FILE             PM_STATUS(1LL)
-#define PM_NOT_PRESENT(v2)  PM_STATUS2(v2, 0)
+#define PM_ENTRY_BYTES         sizeof(pagemap_entry_t)
+#define PM_PFRAME_BITS         55
+#define PM_PFRAME_MASK         GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
+#define PM_SOFT_DIRTY          BIT_ULL(55)
+#define PM_MMAP_EXCLUSIVE      BIT_ULL(56)
+#define PM_FILE                        BIT_ULL(61)
+#define PM_SWAP                        BIT_ULL(62)
+#define PM_PRESENT             BIT_ULL(63)
+
 #define PM_END_OF_BUFFER    1
 
-static inline pagemap_entry_t make_pme(u64 val)
+static inline pagemap_entry_t make_pme(u64 frame, u64 flags)
 {
-       return (pagemap_entry_t) { .pme = val };
+       return (pagemap_entry_t) { .pme = (frame & PM_PFRAME_MASK) | flags };
 }
 
 static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
@@ -1011,7 +1036,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
 
        while (addr < end) {
                struct vm_area_struct *vma = find_vma(walk->mm, addr);
-               pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
+               pagemap_entry_t pme = make_pme(0, 0);
                /* End of address space hole, which we mark as non-present. */
                unsigned long hole_end;
 
@@ -1031,7 +1056,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
 
                /* Addresses in the VMA. */
                if (vma->vm_flags & VM_SOFTDIRTY)
-                       pme.pme |= PM_STATUS2(pm->v2, __PM_SOFT_DIRTY);
+                       pme = make_pme(0, PM_SOFT_DIRTY);
                for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
                        err = add_to_pagemap(addr, &pme, pm);
                        if (err)
@@ -1042,67 +1067,42 @@ out:
        return err;
 }
 
-static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
+static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
                struct vm_area_struct *vma, unsigned long addr, pte_t pte)
 {
-       u64 frame, flags;
+       u64 frame = 0, flags = 0;
        struct page *page = NULL;
-       int flags2 = 0;
 
        if (pte_present(pte)) {
-               frame = pte_pfn(pte);
-               flags = PM_PRESENT;
+               if (pm->show_pfn)
+                       frame = pte_pfn(pte);
+               flags |= PM_PRESENT;
                page = vm_normal_page(vma, addr, pte);
                if (pte_soft_dirty(pte))
-                       flags2 |= __PM_SOFT_DIRTY;
+                       flags |= PM_SOFT_DIRTY;
        } else if (is_swap_pte(pte)) {
                swp_entry_t entry;
                if (pte_swp_soft_dirty(pte))
-                       flags2 |= __PM_SOFT_DIRTY;
+                       flags |= PM_SOFT_DIRTY;
                entry = pte_to_swp_entry(pte);
                frame = swp_type(entry) |
                        (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
-               flags = PM_SWAP;
+               flags |= PM_SWAP;
                if (is_migration_entry(entry))
                        page = migration_entry_to_page(entry);
-       } else {
-               if (vma->vm_flags & VM_SOFTDIRTY)
-                       flags2 |= __PM_SOFT_DIRTY;
-               *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2));
-               return;
        }
 
        if (page && !PageAnon(page))
                flags |= PM_FILE;
-       if ((vma->vm_flags & VM_SOFTDIRTY))
-               flags2 |= __PM_SOFT_DIRTY;
+       if (page && page_mapcount(page) == 1)
+               flags |= PM_MMAP_EXCLUSIVE;
+       if (vma->vm_flags & VM_SOFTDIRTY)
+               flags |= PM_SOFT_DIRTY;
 
-       *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
+       return make_pme(frame, flags);
 }
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
-               pmd_t pmd, int offset, int pmd_flags2)
-{
-       /*
-        * Currently pmd for thp is always present because thp can not be
-        * swapped-out, migrated, or HWPOISONed (split in such cases instead.)
-        * This if-check is just to prepare for future implementation.
-        */
-       if (pmd_present(pmd))
-               *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
-                               | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT);
-       else
-               *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, pmd_flags2));
-}
-#else
-static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
-               pmd_t pmd, int offset, int pmd_flags2)
-{
-}
-#endif
-
-static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
                             struct mm_walk *walk)
 {
        struct vm_area_struct *vma = walk->vma;
@@ -1111,41 +1111,58 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        pte_t *pte, *orig_pte;
        int err = 0;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
-               int pmd_flags2;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       if (pmd_trans_huge_lock(pmdp, vma, &ptl) == 1) {
+               u64 flags = 0, frame = 0;
+               pmd_t pmd = *pmdp;
 
-               if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
-                       pmd_flags2 = __PM_SOFT_DIRTY;
-               else
-                       pmd_flags2 = 0;
+               if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
+                       flags |= PM_SOFT_DIRTY;
+
+               /*
+                * Currently pmd for thp is always present because thp
+                * can not be swapped-out, migrated, or HWPOISONed
+                * (split in such cases instead.)
+                * This if-check is just to prepare for future implementation.
+                */
+               if (pmd_present(pmd)) {
+                       struct page *page = pmd_page(pmd);
+
+                       if (page_mapcount(page) == 1)
+                               flags |= PM_MMAP_EXCLUSIVE;
+
+                       flags |= PM_PRESENT;
+                       if (pm->show_pfn)
+                               frame = pmd_pfn(pmd) +
+                                       ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+               }
 
                for (; addr != end; addr += PAGE_SIZE) {
-                       unsigned long offset;
-                       pagemap_entry_t pme;
+                       pagemap_entry_t pme = make_pme(frame, flags);
 
-                       offset = (addr & ~PAGEMAP_WALK_MASK) >>
-                                       PAGE_SHIFT;
-                       thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2);
                        err = add_to_pagemap(addr, &pme, pm);
                        if (err)
                                break;
+                       if (pm->show_pfn && (flags & PM_PRESENT))
+                               frame++;
                }
                spin_unlock(ptl);
                return err;
        }
 
-       if (pmd_trans_unstable(pmd))
+       if (pmd_trans_unstable(pmdp))
                return 0;
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
        /*
         * We can assume that @vma always points to a valid one and @end never
         * goes beyond vma->vm_end.
         */
-       orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+       orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
        for (; addr < end; pte++, addr += PAGE_SIZE) {
                pagemap_entry_t pme;
 
-               pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
+               pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
                err = add_to_pagemap(addr, &pme, pm);
                if (err)
                        break;
@@ -1158,40 +1175,44 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
-static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
-                                       pte_t pte, int offset, int flags2)
-{
-       if (pte_present(pte))
-               *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)        |
-                               PM_STATUS2(pm->v2, flags2)              |
-                               PM_PRESENT);
-       else
-               *pme = make_pme(PM_NOT_PRESENT(pm->v2)                  |
-                               PM_STATUS2(pm->v2, flags2));
-}
-
 /* This function walks within one hugetlb entry in the single call */
-static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
+static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
                                 unsigned long addr, unsigned long end,
                                 struct mm_walk *walk)
 {
        struct pagemapread *pm = walk->private;
        struct vm_area_struct *vma = walk->vma;
+       u64 flags = 0, frame = 0;
        int err = 0;
-       int flags2;
-       pagemap_entry_t pme;
+       pte_t pte;
 
        if (vma->vm_flags & VM_SOFTDIRTY)
-               flags2 = __PM_SOFT_DIRTY;
-       else
-               flags2 = 0;
+               flags |= PM_SOFT_DIRTY;
+
+       pte = huge_ptep_get(ptep);
+       if (pte_present(pte)) {
+               struct page *page = pte_page(pte);
+
+               if (!PageAnon(page))
+                       flags |= PM_FILE;
+
+               if (page_mapcount(page) == 1)
+                       flags |= PM_MMAP_EXCLUSIVE;
+
+               flags |= PM_PRESENT;
+               if (pm->show_pfn)
+                       frame = pte_pfn(pte) +
+                               ((addr & ~hmask) >> PAGE_SHIFT);
+       }
 
        for (; addr != end; addr += PAGE_SIZE) {
-               int offset = (addr & ~hmask) >> PAGE_SHIFT;
-               huge_pte_to_pagemap_entry(&pme, pm, *pte, offset, flags2);
+               pagemap_entry_t pme = make_pme(frame, flags);
+
                err = add_to_pagemap(addr, &pme, pm);
                if (err)
                        return err;
+               if (pm->show_pfn && (flags & PM_PRESENT))
+                       frame++;
        }
 
        cond_resched();
@@ -1209,7 +1230,9 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
  * Bits 0-54  page frame number (PFN) if present
  * Bits 0-4   swap type if swapped
  * Bits 5-54  swap offset if swapped
- * Bits 55-60 page shift (page size = 1<<page shift)
+ * Bit  55    pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
+ * Bit  56    page exclusively mapped
+ * Bits 57-60 zero
  * Bit  61    page is file-page or shared-anon
  * Bit  62    page swapped
  * Bit  63    page present
@@ -1227,42 +1250,37 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
 static ssize_t pagemap_read(struct file *file, char __user *buf,
                            size_t count, loff_t *ppos)
 {
-       struct task_struct *task = get_proc_task(file_inode(file));
-       struct mm_struct *mm;
+       struct mm_struct *mm = file->private_data;
        struct pagemapread pm;
-       int ret = -ESRCH;
        struct mm_walk pagemap_walk = {};
        unsigned long src;
        unsigned long svpfn;
        unsigned long start_vaddr;
        unsigned long end_vaddr;
-       int copied = 0;
+       int ret = 0, copied = 0;
 
-       if (!task)
+       if (!mm || !atomic_inc_not_zero(&mm->mm_users))
                goto out;
 
        ret = -EINVAL;
        /* file position must be aligned */
        if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
-               goto out_task;
+               goto out_mm;
 
        ret = 0;
        if (!count)
-               goto out_task;
+               goto out_mm;
+
+       /* do not disclose physical addresses: attack vector */
+       pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
 
-       pm.v2 = soft_dirty_cleared;
        pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
        pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY);
        ret = -ENOMEM;
        if (!pm.buffer)
-               goto out_task;
-
-       mm = mm_access(task, PTRACE_MODE_READ);
-       ret = PTR_ERR(mm);
-       if (!mm || IS_ERR(mm))
-               goto out_free;
+               goto out_mm;
 
-       pagemap_walk.pmd_entry = pagemap_pte_range;
+       pagemap_walk.pmd_entry = pagemap_pmd_range;
        pagemap_walk.pte_hole = pagemap_pte_hole;
 #ifdef CONFIG_HUGETLB_PAGE
        pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
@@ -1273,10 +1291,10 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
        src = *ppos;
        svpfn = src / PM_ENTRY_BYTES;
        start_vaddr = svpfn << PAGE_SHIFT;
-       end_vaddr = TASK_SIZE_OF(task);
+       end_vaddr = mm->task_size;
 
        /* watch out for wraparound */
-       if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
+       if (svpfn > mm->task_size >> PAGE_SHIFT)
                start_vaddr = end_vaddr;
 
        /*
@@ -1303,7 +1321,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
                len = min(count, PM_ENTRY_BYTES * pm.pos);
                if (copy_to_user(buf, pm.buffer, len)) {
                        ret = -EFAULT;
-                       goto out_mm;
+                       goto out_free;
                }
                copied += len;
                buf += len;
@@ -1313,24 +1331,31 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
        if (!ret || ret == PM_END_OF_BUFFER)
                ret = copied;
 
-out_mm:
-       mmput(mm);
 out_free:
        kfree(pm.buffer);
-out_task:
-       put_task_struct(task);
+out_mm:
+       mmput(mm);
 out:
        return ret;
 }
 
 static int pagemap_open(struct inode *inode, struct file *file)
 {
-       /* do not disclose physical addresses: attack vector */
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
-       pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
-                       "to stop being page-shift some time soon. See the "
-                       "linux/Documentation/vm/pagemap.txt for details.\n");
+       struct mm_struct *mm;
+
+       mm = proc_mem_open(inode, PTRACE_MODE_READ);
+       if (IS_ERR(mm))
+               return PTR_ERR(mm);
+       file->private_data = mm;
+       return 0;
+}
+
+static int pagemap_release(struct inode *inode, struct file *file)
+{
+       struct mm_struct *mm = file->private_data;
+
+       if (mm)
+               mmdrop(mm);
        return 0;
 }
 
@@ -1338,6 +1363,7 @@ const struct file_operations proc_pagemap_operations = {
        .llseek         = mem_lseek, /* borrow this */
        .read           = pagemap_read,
        .open           = pagemap_open,
+       .release        = pagemap_release,
 };
 #endif /* CONFIG_PROC_PAGE_MONITOR */
 
@@ -1447,18 +1473,19 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
                unsigned long addr, unsigned long end, struct mm_walk *walk)
 {
+       pte_t huge_pte = huge_ptep_get(pte);
        struct numa_maps *md;
        struct page *page;
 
-       if (!pte_present(*pte))
+       if (!pte_present(huge_pte))
                return 0;
 
-       page = pte_page(*pte);
+       page = pte_page(huge_pte);
        if (!page)
                return 0;
 
        md = walk->private;
-       gather_stats(page, md, pte_dirty(*pte), 1);
+       gather_stats(page, md, pte_dirty(huge_pte), 1);
        return 0;
 }
 
@@ -1509,7 +1536,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
 
        if (file) {
                seq_puts(m, " file=");
-               seq_path(m, &file->f_path, "\n\t= ");
+               seq_file_path(m, file, "\n\t= ");
        } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
                seq_puts(m, " heap");
        } else {