These changes are the raw update to linux-4.4.6-rt14. Kernel sources

[kvmfornfv.git] / kernel / mm / compaction.c
diff --git a/kernel/mm/compaction.c b/kernel/mm/compaction.c

index 0af17fe..ba0f146 100644 (file)
--- a/kernel/mm/compaction.c
+++ b/kernel/mm/compaction.c
@@ -35,17 +35,6 @@ static inline void count_compact_events(enum vm_event_item item, long delta)
  #endif
  
  #if defined CONFIG_COMPACTION || defined CONFIG_CMA
-#ifdef CONFIG_TRACEPOINTS
-static const char *const compaction_status_string[] = {
-       "deferred",
-       "skipped",
-       "continue",
-       "partial",
-       "complete",
-       "no_suitable_page",
-       "not_suitable_zone",
-};
-#endif
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/compaction.h>
@@ -207,6 +196,13 @@ static inline bool isolation_suitable(struct compact_control *cc,
         return !get_pageblock_skip(page);
  }
  
+static void reset_cached_positions(struct zone *zone)
+{
+       zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
+       zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
+       zone->compact_cached_free_pfn = zone_end_pfn(zone);
+}
+
  /*
   * This function is called to clear all cached information on pageblocks that
   * should be skipped for page isolation when the migrate and free page scanner
@@ -218,9 +214,6 @@ static void __reset_isolation_suitable(struct zone *zone)
         unsigned long end_pfn = zone_end_pfn(zone);
         unsigned long pfn;
  
-       zone->compact_cached_migrate_pfn[0] = start_pfn;
-       zone->compact_cached_migrate_pfn[1] = start_pfn;
-       zone->compact_cached_free_pfn = end_pfn;
         zone->compact_blockskip_flush = false;
  
         /* Walk the zone and mark every pageblock as suitable for isolation */
@@ -238,6 +231,8 @@ static void __reset_isolation_suitable(struct zone *zone)
  
                 clear_pageblock_skip(page);
         }
+
+       reset_cached_positions(zone);
  }
  
  void reset_isolation_suitable(pg_data_t *pgdat)
@@ -431,6 +426,24 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
  
                 if (!valid_page)
                         valid_page = page;
+
+               /*
+                * For compound pages such as THP and hugetlbfs, we can save
+                * potentially a lot of iterations if we skip them at once.
+                * The check is racy, but we can consider only valid values
+                * and the only danger is skipping too much.
+                */
+               if (PageCompound(page)) {
+                       unsigned int comp_order = compound_order(page);
+
+                       if (likely(comp_order < MAX_ORDER)) {
+                               blockpfn += (1UL << comp_order) - 1;
+                               cursor += (1UL << comp_order) - 1;
+                       }
+
+                       goto isolate_fail;
+               }
+
                 if (!PageBuddy(page))
                         goto isolate_fail;
  
@@ -490,6 +503,13 @@ isolate_fail:
  
         }
  
+       /*
+        * There is a tiny chance that we have read bogus compound_order(),
+        * so be careful to not go outside of the pageblock.
+        */
+       if (unlikely(blockpfn > end_pfn))
+               blockpfn = end_pfn;
+
         trace_mm_compaction_isolate_freepages(*start_pfn, blockpfn,
                                         nr_scanned, total_isolated);
  
@@ -674,6 +694,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
  
         /* Time to isolate some pages for migration */
         for (; low_pfn < end_pfn; low_pfn++) {
+               bool is_lru;
+
                 /*
                  * Periodically drop the lock (if held) regardless of its
                  * contention, to give chance to IRQs. Abort async compaction
@@ -717,36 +739,35 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                  * It's possible to migrate LRU pages and balloon pages
                  * Skip any other type of page
                  */
-               if (!PageLRU(page)) {
+               is_lru = PageLRU(page);
+               if (!is_lru) {
                         if (unlikely(balloon_page_movable(page))) {
                                 if (balloon_page_isolate(page)) {
                                         /* Successfully isolated */
                                         goto isolate_success;
                                 }
                         }
-                       continue;
                 }
  
                 /*
-                * PageLRU is set. lru_lock normally excludes isolation
-                * splitting and collapsing (collapsing has already happened
-                * if PageLRU is set) but the lock is not necessarily taken
-                * here and it is wasteful to take it just to check transhuge.
-                * Check TransHuge without lock and skip the whole pageblock if
-                * it's either a transhuge or hugetlbfs page, as calling
-                * compound_order() without preventing THP from splitting the
-                * page underneath us may return surprising results.
+                * Regardless of being on LRU, compound pages such as THP and
+                * hugetlbfs are not to be compacted. We can potentially save
+                * a lot of iterations if we skip them at once. The check is
+                * racy, but we can consider only valid values and the only
+                * danger is skipping too much.
                  */
-               if (PageTransHuge(page)) {
-                       if (!locked)
-                               low_pfn = ALIGN(low_pfn + 1,
-                                               pageblock_nr_pages) - 1;
-                       else
-                               low_pfn += (1 << compound_order(page)) - 1;
+               if (PageCompound(page)) {
+                       unsigned int comp_order = compound_order(page);
+
+                       if (likely(comp_order < MAX_ORDER))
+                               low_pfn += (1UL << comp_order) - 1;
  
                         continue;
                 }
  
+               if (!is_lru)
+                       continue;
+
                 /*
                  * Migration will fail if an anonymous page is pinned in memory,
                  * so avoid taking lru_lock and isolating it unnecessarily in an
@@ -763,11 +784,17 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                         if (!locked)
                                 break;
  
-                       /* Recheck PageLRU and PageTransHuge under lock */
+                       /* Recheck PageLRU and PageCompound under lock */
                         if (!PageLRU(page))
                                 continue;
-                       if (PageTransHuge(page)) {
-                               low_pfn += (1 << compound_order(page)) - 1;
+
+                       /*
+                        * Page become compound since the non-locked check,
+                        * and it's on LRU. It can only be a THP so the order
+                        * is safe to read and it's 0 for tail pages.
+                        */
+                       if (unlikely(PageCompound(page))) {
+                               low_pfn += (1UL << compound_order(page)) - 1;
                                 continue;
                         }
                 }
@@ -778,7 +805,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                 if (__isolate_lru_page(page, isolate_mode) != 0)
                         continue;
  
-               VM_BUG_ON_PAGE(PageTransCompound(page), page);
+               VM_BUG_ON_PAGE(PageCompound(page), page);
  
                 /* Successfully isolated */
                 del_page_from_lru_list(page, lruvec, page_lru(page));
@@ -897,6 +924,16 @@ static bool suitable_migration_target(struct page *page)
         return false;
  }
  
+/*
+ * Test whether the free scanner has reached the same or lower pageblock than
+ * the migration scanner, and compaction should thus terminate.
+ */
+static inline bool compact_scanners_met(struct compact_control *cc)
+{
+       return (cc->free_pfn >> pageblock_order)
+               <= (cc->migrate_pfn >> pageblock_order);
+}
+
  /*
   * Based on information in the current compact_control, find blocks
   * suitable for isolating free pages from and then isolate them.
@@ -933,8 +970,7 @@ static void isolate_freepages(struct compact_control *cc)
          * pages on cc->migratepages. We stop searching if the migrate
          * and free page scanners meet or enough free pages are isolated.
          */
-       for (; block_start_pfn >= low_pfn &&
-                       cc->nr_migratepages > cc->nr_freepages;
+       for (; block_start_pfn >= low_pfn;
                                 block_end_pfn = block_start_pfn,
                                 block_start_pfn -= pageblock_nr_pages,
                                 isolate_start_pfn = block_start_pfn) {
@@ -966,6 +1002,8 @@ static void isolate_freepages(struct compact_control *cc)
                                         block_end_pfn, freelist, false);
  
                 /*
+                * If we isolated enough freepages, or aborted due to async
+                * compaction being contended, terminate the loop.
                  * Remember where the free scanner should restart next time,
                  * which is where isolate_freepages_block() left off.
                  * But if it scanned the whole pageblock, isolate_start_pfn
@@ -974,27 +1012,31 @@ static void isolate_freepages(struct compact_control *cc)
                  * In that case we will however want to restart at the start
                  * of the previous pageblock.
                  */
-               cc->free_pfn = (isolate_start_pfn < block_end_pfn) ?
-                               isolate_start_pfn :
-                               block_start_pfn - pageblock_nr_pages;
-
-               /*
-                * isolate_freepages_block() might have aborted due to async
-                * compaction being contended
-                */
-               if (cc->contended)
+               if ((cc->nr_freepages >= cc->nr_migratepages)
+                                                       || cc->contended) {
+                       if (isolate_start_pfn >= block_end_pfn)
+                               isolate_start_pfn =
+                                       block_start_pfn - pageblock_nr_pages;
                         break;
+               } else {
+                       /*
+                        * isolate_freepages_block() should not terminate
+                        * prematurely unless contended, or isolated enough
+                        */
+                       VM_BUG_ON(isolate_start_pfn < block_end_pfn);
+               }
         }
  
         /* split_free_page does not map the pages */
         map_pages(freelist);
  
         /*
-        * If we crossed the migrate scanner, we want to keep it that way
-        * so that compact_finished() may detect this
+        * Record where the free scanner will restart next time. Either we
+        * broke from the loop and set isolate_start_pfn based on the last
+        * call to isolate_freepages_block(), or we met the migration scanner
+        * and the loop terminated due to isolate_start_pfn < low_pfn
          */
-       if (block_start_pfn < low_pfn)
-               cc->free_pfn = cc->migrate_pfn;
+       cc->free_pfn = isolate_start_pfn;
  }
  
  /*
@@ -1062,6 +1104,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
                                         struct compact_control *cc)
  {
         unsigned long low_pfn, end_pfn;
+       unsigned long isolate_start_pfn;
         struct page *page;
         const isolate_mode_t isolate_mode =
                 (sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
@@ -1110,6 +1153,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
                         continue;
  
                 /* Perform the isolation */
+               isolate_start_pfn = low_pfn;
                 low_pfn = isolate_migratepages_block(cc, low_pfn, end_pfn,
                                                                 isolate_mode);
  
@@ -1118,6 +1162,15 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
                         return ISOLATE_ABORT;
                 }
  
+               /*
+                * Record where we could have freed pages by migration and not
+                * yet flushed them to buddy allocator.
+                * - this is the lowest page that could have been isolated and
+                * then freed by migration.
+                */
+               if (cc->nr_migratepages && !cc->last_migrated_pfn)
+                       cc->last_migrated_pfn = isolate_start_pfn;
+
                 /*
                  * Either we isolated something and proceed with migration. Or
                  * we failed and compact_zone should decide if we should
@@ -1127,16 +1180,21 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
         }
  
         acct_isolated(zone, cc);
-       /*
-        * Record where migration scanner will be restarted. If we end up in
-        * the same pageblock as the free scanner, make the scanners fully
-        * meet so that compact_finished() terminates compaction.
-        */
-       cc->migrate_pfn = (end_pfn <= cc->free_pfn) ? low_pfn : cc->free_pfn;
+       /* Record where migration scanner will be restarted. */
+       cc->migrate_pfn = low_pfn;
  
         return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
  }
  
+/*
+ * order == -1 is expected when compacting via
+ * /proc/sys/vm/compact_memory
+ */
+static inline bool is_via_compact_memory(int order)
+{
+       return order == -1;
+}
+
  static int __compact_finished(struct zone *zone, struct compact_control *cc,
                             const int migratetype)
  {
@@ -1144,14 +1202,12 @@ static int __compact_finished(struct zone *zone, struct compact_control *cc,
         unsigned long watermark;
  
         if (cc->contended || fatal_signal_pending(current))
-               return COMPACT_PARTIAL;
+               return COMPACT_CONTENDED;
  
         /* Compaction run completes if the migrate and free scanner meet */
-       if (cc->free_pfn <= cc->migrate_pfn) {
+       if (compact_scanners_met(cc)) {
                 /* Let the next compaction start anew. */
-               zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
-               zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
-               zone->compact_cached_free_pfn = zone_end_pfn(zone);
+               reset_cached_positions(zone);
  
                 /*
                  * Mark that the PG_migrate_skip information should be cleared
@@ -1165,11 +1221,7 @@ static int __compact_finished(struct zone *zone, struct compact_control *cc,
                 return COMPACT_COMPLETE;
         }
  
-       /*
-        * order == -1 is expected when compacting via
-        * /proc/sys/vm/compact_memory
-        */
-       if (cc->order == -1)
+       if (is_via_compact_memory(cc->order))
                 return COMPACT_CONTINUE;
  
         /* Compaction run is not finished if the watermark is not met */
@@ -1232,11 +1284,7 @@ static unsigned long __compaction_suitable(struct zone *zone, int order,
         int fragindex;
         unsigned long watermark;
  
-       /*
-        * order == -1 is expected when compacting via
-        * /proc/sys/vm/compact_memory
-        */
-       if (order == -1)
+       if (is_via_compact_memory(order))
                 return COMPACT_CONTINUE;
  
         watermark = low_wmark_pages(zone);
@@ -1295,7 +1343,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
         unsigned long end_pfn = zone_end_pfn(zone);
         const int migratetype = gfpflags_to_migratetype(cc->gfp_mask);
         const bool sync = cc->mode != MIGRATE_ASYNC;
-       unsigned long last_migrated_pfn = 0;
  
         ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
                                                         cc->classzone_idx);
@@ -1333,6 +1380,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
                 zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
                 zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
         }
+       cc->last_migrated_pfn = 0;
  
         trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
                                 cc->free_pfn, end_pfn, sync);
@@ -1342,11 +1390,10 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
         while ((ret = compact_finished(zone, cc, migratetype)) ==
                                                 COMPACT_CONTINUE) {
                 int err;
-               unsigned long isolate_start_pfn = cc->migrate_pfn;
  
                 switch (isolate_migratepages(zone, cc)) {
                 case ISOLATE_ABORT:
-                       ret = COMPACT_PARTIAL;
+                       ret = COMPACT_CONTENDED;
                         putback_movable_pages(&cc->migratepages);
                         cc->nr_migratepages = 0;
                         goto out;
@@ -1376,22 +1423,12 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
                          * migrate_pages() may return -ENOMEM when scanners meet
                          * and we want compact_finished() to detect it
                          */
-                       if (err == -ENOMEM && cc->free_pfn > cc->migrate_pfn) {
-                               ret = COMPACT_PARTIAL;
+                       if (err == -ENOMEM && !compact_scanners_met(cc)) {
+                               ret = COMPACT_CONTENDED;
                                 goto out;
                         }
                 }
  
-               /*
-                * Record where we could have freed pages by migration and not
-                * yet flushed them to buddy allocator. We use the pfn that
-                * isolate_migratepages() started from in this loop iteration
-                * - this is the lowest page that could have been isolated and
-                * then freed by migration.
-                */
-               if (!last_migrated_pfn)
-                       last_migrated_pfn = isolate_start_pfn;
-
  check_drain:
                 /*
                  * Has the migration scanner moved away from the previous
@@ -1400,12 +1437,12 @@ check_drain:
                  * compact_finished() can detect immediately if allocation
                  * would succeed.
                  */
-               if (cc->order > 0 && last_migrated_pfn) {
+               if (cc->order > 0 && cc->last_migrated_pfn) {
                         int cpu;
                         unsigned long current_block_start =
                                 cc->migrate_pfn & ~((1UL << cc->order) - 1);
  
-                       if (last_migrated_pfn < current_block_start) {
+                       if (cc->last_migrated_pfn < current_block_start) {
                                 cpu = get_cpu_light();
                                 local_lock_irq(swapvec_lock);
                                 lru_add_drain_cpu(cpu);
@@ -1413,7 +1450,7 @@ check_drain:
                                 drain_local_pages(zone);
                                 put_cpu_light();
                                 /* No more flushing until we migrate again */
-                               last_migrated_pfn = 0;
+                               cc->last_migrated_pfn = 0;
                         }
                 }
  
@@ -1442,6 +1479,9 @@ out:
         trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
                                 cc->free_pfn, end_pfn, sync, ret);
  
+       if (ret == COMPACT_CONTENDED)
+               ret = COMPACT_PARTIAL;
+
         return ret;
  }
  
@@ -1613,10 +1653,11 @@ static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
                  * this makes sure we compact the whole zone regardless of
                  * cached scanner positions.
                  */
-               if (cc->order == -1)
+               if (is_via_compact_memory(cc->order))
                         __reset_isolation_suitable(zone);
  
-               if (cc->order == -1 || !compaction_deferred(zone, cc->order))
+               if (is_via_compact_memory(cc->order) ||
+                               !compaction_deferred(zone, cc->order))
                         compact_zone(zone, cc);
  
                 if (cc->order > 0) {