/*
- * Memory Migration functionality - linux/mm/migration.c
+ * Memory Migration functionality - linux/mm/migrate.c
*
* Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
*
#include <linux/mempolicy.h>
#include <linux/vmalloc.h>
#include <linux/security.h>
-#include <linux/memcontrol.h>
+#include <linux/backing-dev.h>
#include <linux/syscalls.h>
#include <linux/hugetlb.h>
#include <linux/hugetlb_cgroup.h>
#include <linux/gfp.h>
#include <linux/balloon_compaction.h>
#include <linux/mmu_notifier.h>
+#include <linux/page_idle.h>
#include <asm/tlbflush.h>
else
page_add_file_rmap(new);
+ if (vma->vm_flags & VM_LOCKED)
+ mlock_vma_page(new);
+
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, ptep);
unlock:
struct buffer_head *head, enum migrate_mode mode,
int extra_count)
{
+ struct zone *oldzone, *newzone;
+ int dirty;
int expected_count = 1 + extra_count;
void **pslot;
/* Anonymous page without mapping */
if (page_count(page) != expected_count)
return -EAGAIN;
+
+ /* No turning back from here */
+ set_page_memcg(newpage, page_memcg(page));
+ newpage->index = page->index;
+ newpage->mapping = page->mapping;
+ if (PageSwapBacked(page))
+ SetPageSwapBacked(newpage);
+
return MIGRATEPAGE_SUCCESS;
}
+ oldzone = page_zone(page);
+ newzone = page_zone(newpage);
+
spin_lock_irq(&mapping->tree_lock);
pslot = radix_tree_lookup_slot(&mapping->page_tree,
}
/*
- * Now we know that no one else is looking at the page.
+ * Now we know that no one else is looking at the page:
+ * no turning back from here.
*/
+ set_page_memcg(newpage, page_memcg(page));
+ newpage->index = page->index;
+ newpage->mapping = page->mapping;
+ if (PageSwapBacked(page))
+ SetPageSwapBacked(newpage);
+
get_page(newpage); /* add cache reference */
if (PageSwapCache(page)) {
SetPageSwapCache(newpage);
set_page_private(newpage, page_private(page));
}
+ /* Move dirty while page refs frozen and newpage not yet exposed */
+ dirty = PageDirty(page);
+ if (dirty) {
+ ClearPageDirty(page);
+ SetPageDirty(newpage);
+ }
+
radix_tree_replace_slot(pslot, newpage);
/*
*/
page_unfreeze_refs(page, expected_count - 1);
+ spin_unlock(&mapping->tree_lock);
+ /* Leave irq disabled to prevent preemption while updating stats */
+
/*
* If moved to a different zone then also account
* the page for that zone. Other VM counters will be
* via NR_FILE_PAGES and NR_ANON_PAGES if they
* are mapped to swap space.
*/
- __dec_zone_page_state(page, NR_FILE_PAGES);
- __inc_zone_page_state(newpage, NR_FILE_PAGES);
- if (!PageSwapCache(page) && PageSwapBacked(page)) {
- __dec_zone_page_state(page, NR_SHMEM);
- __inc_zone_page_state(newpage, NR_SHMEM);
+ if (newzone != oldzone) {
+ __dec_zone_state(oldzone, NR_FILE_PAGES);
+ __inc_zone_state(newzone, NR_FILE_PAGES);
+ if (PageSwapBacked(page) && !PageSwapCache(page)) {
+ __dec_zone_state(oldzone, NR_SHMEM);
+ __inc_zone_state(newzone, NR_SHMEM);
+ }
+ if (dirty && mapping_cap_account_dirty(mapping)) {
+ __dec_zone_state(oldzone, NR_FILE_DIRTY);
+ __inc_zone_state(newzone, NR_FILE_DIRTY);
+ }
}
- spin_unlock_irq(&mapping->tree_lock);
+ local_irq_enable();
return MIGRATEPAGE_SUCCESS;
}
int expected_count;
void **pslot;
- if (!mapping) {
- if (page_count(page) != 1)
- return -EAGAIN;
- return MIGRATEPAGE_SUCCESS;
- }
-
spin_lock_irq(&mapping->tree_lock);
pslot = radix_tree_lookup_slot(&mapping->page_tree,
return -EAGAIN;
}
+ set_page_memcg(newpage, page_memcg(page));
+ newpage->index = page->index;
+ newpage->mapping = page->mapping;
get_page(newpage);
radix_tree_replace_slot(pslot, newpage);
if (PageMappedToDisk(page))
SetPageMappedToDisk(newpage);
- if (PageDirty(page)) {
- clear_page_dirty_for_io(page);
- /*
- * Want to mark the page and the radix tree as dirty, and
- * redo the accounting that clear_page_dirty_for_io undid,
- * but we can't use set_page_dirty because that function
- * is actually a signal that all of the page has become dirty.
- * Whereas only part of our page may be dirty.
- */
- if (PageSwapBacked(page))
- SetPageDirty(newpage);
- else
- __set_page_dirty_nobuffers(newpage);
- }
+ /* Move dirty on pages not done by migrate_page_move_mapping() */
+ if (PageDirty(page))
+ SetPageDirty(newpage);
+
+ if (page_is_young(page))
+ set_page_young(newpage);
+ if (page_is_idle(page))
+ set_page_idle(newpage);
/*
* Copy NUMA information to the new page, to prevent over-eager
cpupid = page_cpupid_xchg_last(page, -1);
page_cpupid_xchg_last(newpage, cpupid);
- mlock_migrate_page(newpage, page);
ksm_migrate_page(newpage, page);
/*
* Please do not reorder this without considering how mm/ksm.c's
* MIGRATEPAGE_SUCCESS - success
*/
static int move_to_new_page(struct page *newpage, struct page *page,
- int page_was_mapped, enum migrate_mode mode)
+ enum migrate_mode mode)
{
struct address_space *mapping;
int rc;
- /*
- * Block others from accessing the page when we get around to
- * establishing additional references. We are the only one
- * holding a reference to the new page at this point.
- */
- if (!trylock_page(newpage))
- BUG();
-
- /* Prepare mapping for the new page.*/
- newpage->index = page->index;
- newpage->mapping = page->mapping;
- if (PageSwapBacked(page))
- SetPageSwapBacked(newpage);
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
mapping = page_mapping(page);
if (!mapping)
* space which also has its own migratepage callback. This
* is the most common path for page migration.
*/
- rc = mapping->a_ops->migratepage(mapping,
- newpage, page, mode);
+ rc = mapping->a_ops->migratepage(mapping, newpage, page, mode);
else
rc = fallback_migrate_page(mapping, newpage, page, mode);
- if (rc != MIGRATEPAGE_SUCCESS) {
- newpage->mapping = NULL;
- } else {
- mem_cgroup_migrate(page, newpage, false);
- if (page_was_mapped)
- remove_migration_ptes(page, newpage);
- page->mapping = NULL;
+ /*
+ * When successful, old pagecache page->mapping must be cleared before
+ * page is freed; but stats require that PageAnon be left as PageAnon.
+ */
+ if (rc == MIGRATEPAGE_SUCCESS) {
+ set_page_memcg(page, NULL);
+ if (!PageAnon(page))
+ page->mapping = NULL;
}
-
- unlock_page(newpage);
-
return rc;
}
goto out_unlock;
wait_on_page_writeback(page);
}
+
/*
* By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
* we cannot notice that anon_vma is freed while we migrates a page.
* of migration. File cache pages are no problem because of page_lock()
* File Caches may use write_page() or lock_page() in migration, then,
* just care Anon page here.
+ *
+ * Only page_get_anon_vma() understands the subtleties of
+ * getting a hold on an anon_vma from outside one of its mms.
+ * But if we cannot get anon_vma, then we won't need it anyway,
+ * because that implies that the anon page is no longer mapped
+ * (and cannot be remapped so long as we hold the page lock).
*/
- if (PageAnon(page) && !PageKsm(page)) {
- /*
- * Only page_lock_anon_vma_read() understands the subtleties of
- * getting a hold on an anon_vma from outside one of its mms.
- */
+ if (PageAnon(page) && !PageKsm(page))
anon_vma = page_get_anon_vma(page);
- if (anon_vma) {
- /*
- * Anon page
- */
- } else if (PageSwapCache(page)) {
- /*
- * We cannot be sure that the anon_vma of an unmapped
- * swapcache page is safe to use because we don't
- * know in advance if the VMA that this page belonged
- * to still exists. If the VMA and others sharing the
- * data have been freed, then the anon_vma could
- * already be invalid.
- *
- * To avoid this possibility, swapcache pages get
- * migrated but are not remapped when migration
- * completes
- */
- } else {
- goto out_unlock;
- }
- }
+
+ /*
+ * Block others from accessing the new page when we get around to
+ * establishing additional references. We are usually the only one
+ * holding a reference to newpage at this point. We used to have a BUG
+ * here if trylock_page(newpage) fails, but would like to allow for
+ * cases where there might be a race with the previous use of newpage.
+ * This is much like races on refcount of oldpage: just don't BUG().
+ */
+ if (unlikely(!trylock_page(newpage)))
+ goto out_unlock;
if (unlikely(isolated_balloon_page(page))) {
/*
* the page migration right away (proteced by page lock).
*/
rc = balloon_page_migrate(newpage, page, mode);
- goto out_unlock;
+ goto out_unlock_both;
}
/*
VM_BUG_ON_PAGE(PageAnon(page), page);
if (page_has_private(page)) {
try_to_free_buffers(page);
- goto out_unlock;
+ goto out_unlock_both;
}
- goto skip_unmap;
- }
-
- /* Establish migration ptes or remove ptes */
- if (page_mapped(page)) {
+ } else if (page_mapped(page)) {
+ /* Establish migration ptes */
+ VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
+ page);
try_to_unmap(page,
TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
page_was_mapped = 1;
}
-skip_unmap:
if (!page_mapped(page))
- rc = move_to_new_page(newpage, page, page_was_mapped, mode);
+ rc = move_to_new_page(newpage, page, mode);
- if (rc && page_was_mapped)
- remove_migration_ptes(page, page);
+ if (page_was_mapped)
+ remove_migration_ptes(page,
+ rc == MIGRATEPAGE_SUCCESS ? newpage : page);
+out_unlock_both:
+ unlock_page(newpage);
+out_unlock:
/* Drop an anon_vma reference if we took one */
if (anon_vma)
put_anon_vma(anon_vma);
-
-out_unlock:
unlock_page(page);
out:
return rc;
static ICE_noinline int unmap_and_move(new_page_t get_new_page,
free_page_t put_new_page,
unsigned long private, struct page *page,
- int force, enum migrate_mode mode)
+ int force, enum migrate_mode mode,
+ enum migrate_reason reason)
{
- int rc = 0;
+ int rc = MIGRATEPAGE_SUCCESS;
int *result = NULL;
- struct page *newpage = get_new_page(page, private, &result);
+ struct page *newpage;
+ newpage = get_new_page(page, private, &result);
if (!newpage)
return -ENOMEM;
goto out;
rc = __unmap_and_move(page, newpage, force, mode);
+ if (rc == MIGRATEPAGE_SUCCESS)
+ put_new_page = NULL;
out:
if (rc != -EAGAIN) {
list_del(&page->lru);
dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
- putback_lru_page(page);
+ /* Soft-offlined page shouldn't go through lru cache list */
+ if (reason == MR_MEMORY_FAILURE) {
+ put_page(page);
+ if (!test_set_page_hwpoison(page))
+ num_poisoned_pages_inc();
+ } else
+ putback_lru_page(page);
}
/*
* it. Otherwise, putback_lru_page() will drop the reference grabbed
* during isolation.
*/
- if (rc != MIGRATEPAGE_SUCCESS && put_new_page) {
- ClearPageSwapBacked(newpage);
+ if (put_new_page)
put_new_page(newpage, private);
- } else if (unlikely(__is_movable_balloon_page(newpage))) {
+ else if (unlikely(__is_movable_balloon_page(newpage))) {
/* drop our reference, page already in the balloon */
put_page(newpage);
} else
struct page *hpage, int force,
enum migrate_mode mode)
{
- int rc = 0;
+ int rc = -EAGAIN;
int *result = NULL;
int page_was_mapped = 0;
struct page *new_hpage;
if (!new_hpage)
return -ENOMEM;
- rc = -EAGAIN;
-
if (!trylock_page(hpage)) {
if (!force || mode != MIGRATE_SYNC)
goto out;
if (PageAnon(hpage))
anon_vma = page_get_anon_vma(hpage);
+ if (unlikely(!trylock_page(new_hpage)))
+ goto put_anon;
+
if (page_mapped(hpage)) {
try_to_unmap(hpage,
TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
}
if (!page_mapped(hpage))
- rc = move_to_new_page(new_hpage, hpage, page_was_mapped, mode);
+ rc = move_to_new_page(new_hpage, hpage, mode);
- if (rc != MIGRATEPAGE_SUCCESS && page_was_mapped)
- remove_migration_ptes(hpage, hpage);
+ if (page_was_mapped)
+ remove_migration_ptes(hpage,
+ rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage);
+ unlock_page(new_hpage);
+
+put_anon:
if (anon_vma)
put_anon_vma(anon_vma);
- if (rc == MIGRATEPAGE_SUCCESS)
+ if (rc == MIGRATEPAGE_SUCCESS) {
hugetlb_cgroup_migrate(hpage, new_hpage);
+ put_new_page = NULL;
+ }
unlock_page(hpage);
out:
* it. Otherwise, put_page() will drop the reference grabbed during
* isolation.
*/
- if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
+ if (put_new_page)
put_new_page(new_hpage, private);
else
- put_page(new_hpage);
+ putback_active_hugepage(new_hpage);
if (result) {
if (rc)
*
* The function returns after 10 attempts or if no pages are movable any more
* because the list has become empty or no retryable pages exist any more.
- * The caller should call putback_lru_pages() to return pages to the LRU
+ * The caller should call putback_movable_pages() to return pages to the LRU
* or free list only if ret != 0.
*
* Returns the number of pages that were not migrated, or an error code.
pass > 2, mode);
else
rc = unmap_and_move(get_new_page, put_new_page,
- private, page, pass > 2, mode);
+ private, page, pass > 2, mode,
+ reason);
switch(rc) {
case -ENOMEM:
}
}
}
- rc = nr_failed + retry;
+ nr_failed += retry;
+ rc = nr_failed;
out:
if (nr_succeeded)
count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
return alloc_huge_page_node(page_hstate(compound_head(p)),
pm->node);
else
- return alloc_pages_exact_node(pm->node,
+ return __alloc_pages_node(pm->node,
GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
}
if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
goto set_status;
- page = follow_page(vma, pp->addr, FOLL_GET|FOLL_SPLIT);
+ /* FOLL_DUMP to ignore special (like zero) pages */
+ page = follow_page(vma, pp->addr,
+ FOLL_GET | FOLL_SPLIT | FOLL_DUMP);
err = PTR_ERR(page);
if (IS_ERR(page))
if (!page)
goto set_status;
- /* Use PageReserved to check for zero page */
- if (PageReserved(page))
- goto put_and_set;
-
pp->page = page;
err = page_to_nid(page);
if (!vma || addr < vma->vm_start)
goto set_status;
- page = follow_page(vma, addr, 0);
+ /* FOLL_DUMP to ignore special (like zero) pages */
+ page = follow_page(vma, addr, FOLL_DUMP);
err = PTR_ERR(page);
if (IS_ERR(page))
goto set_status;
- err = -ENOENT;
- /* Use PageReserved to check for zero page */
- if (!page || PageReserved(page))
- goto set_status;
-
- err = page_to_nid(page);
+ err = page ? page_to_nid(page) : -ENOENT;
set_status:
*status = err;
int nid = (int) data;
struct page *newpage;
- newpage = alloc_pages_exact_node(nid,
+ newpage = __alloc_pages_node(nid,
(GFP_HIGHUSER_MOVABLE |
__GFP_THISNODE | __GFP_NOMEMALLOC |
__GFP_NORETRY | __GFP_NOWARN) &
- ~GFP_IOFS, 0);
+ ~__GFP_RECLAIM, 0);
return newpage;
}
goto out_dropref;
new_page = alloc_pages_node(node,
- (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT,
+ (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
HPAGE_PMD_ORDER);
if (!new_page)
goto out_fail;
SetPageActive(page);
if (TestClearPageUnevictable(new_page))
SetPageUnevictable(page);
- mlock_migrate_page(page, new_page);
unlock_page(new_page);
put_page(new_page); /* Free it */
*/
flush_cache_range(vma, mmun_start, mmun_end);
page_add_anon_rmap(new_page, vma, mmun_start);
- pmdp_clear_flush_notify(vma, mmun_start, pmd);
+ pmdp_huge_clear_flush_notify(vma, mmun_start, pmd);
set_pmd_at(mm, mmun_start, pmd, entry);
flush_tlb_range(vma, mmun_start, mmun_end);
update_mmu_cache_pmd(vma, address, &entry);
goto fail_putback;
}
- mem_cgroup_migrate(page, new_page, false);
-
+ mlock_migrate_page(new_page, page);
+ set_page_memcg(new_page, page_memcg(page));
+ set_page_memcg(page, NULL);
page_remove_rmap(page);
spin_unlock(ptl);