Upgrade to 4.4.50-rt62

[kvmfornfv.git] / kernel / mm / hugetlb.c
diff --git a/kernel/mm/hugetlb.c b/kernel/mm/hugetlb.c

index ef6963b..ea11123 100644 (file)
--- a/kernel/mm/hugetlb.c
+++ b/kernel/mm/hugetlb.c
@@ -1416,12 +1416,13 @@ static void dissolve_free_huge_page(struct page *page)
  {
         spin_lock(&hugetlb_lock);
         if (PageHuge(page) && !page_count(page)) {
-               struct hstate *h = page_hstate(page);
-               int nid = page_to_nid(page);
-               list_del(&page->lru);
+               struct page *head = compound_head(page);
+               struct hstate *h = page_hstate(head);
+               int nid = page_to_nid(head);
+               list_del(&head->lru);
                 h->free_huge_pages--;
                 h->free_huge_pages_node[nid]--;
-               update_and_free_page(h, page);
+               update_and_free_page(h, head);
         }
         spin_unlock(&hugetlb_lock);
  }
@@ -1429,7 +1430,8 @@ static void dissolve_free_huge_page(struct page *page)
  /*
   * Dissolve free hugepages in a given pfn range. Used by memory hotplug to
   * make specified memory blocks removable from the system.
- * Note that start_pfn should aligned with (minimum) hugepage size.
+ * Note that this will dissolve a free gigantic hugepage completely, if any
+ * part of it lies within the given range.
   */
  void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
  {
@@ -1438,7 +1440,6 @@ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
         if (!hugepages_supported())
                 return;
  
-       VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << minimum_order));
         for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order)
                 dissolve_free_huge_page(pfn_to_page(pfn));
  }
@@ -1722,23 +1723,32 @@ free:
  }
  
  /*
- * When releasing a hugetlb pool reservation, any surplus pages that were
- * allocated to satisfy the reservation must be explicitly freed if they were
- * never used.
- * Called with hugetlb_lock held.
+ * This routine has two main purposes:
+ * 1) Decrement the reservation count (resv_huge_pages) by the value passed
+ *    in unused_resv_pages.  This corresponds to the prior adjustments made
+ *    to the associated reservation map.
+ * 2) Free any unused surplus pages that may have been allocated to satisfy
+ *    the reservation.  As many as unused_resv_pages may be freed.
+ *
+ * Called with hugetlb_lock held.  However, the lock could be dropped (and
+ * reacquired) during calls to cond_resched_lock.  Whenever dropping the lock,
+ * we must make sure nobody else can claim pages we are in the process of
+ * freeing.  Do this by ensuring resv_huge_page always is greater than the
+ * number of huge pages we plan to free when dropping the lock.
   */
  static void return_unused_surplus_pages(struct hstate *h,
                                         unsigned long unused_resv_pages)
  {
         unsigned long nr_pages;
  
-       /* Uncommit the reservation */
-       h->resv_huge_pages -= unused_resv_pages;
-
         /* Cannot return gigantic pages currently */
         if (hstate_is_gigantic(h))
-               return;
+               goto out;
  
+       /*
+        * Part (or even all) of the reservation could have been backed
+        * by pre-allocated pages. Only free surplus pages.
+        */
         nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
  
         /*
@@ -1748,12 +1758,22 @@ static void return_unused_surplus_pages(struct hstate *h,
          * when the nodes with surplus pages have no free pages.
          * free_pool_huge_page() will balance the the freed pages across the
          * on-line nodes with memory and will handle the hstate accounting.
+        *
+        * Note that we decrement resv_huge_pages as we free the pages.  If
+        * we drop the lock, resv_huge_pages will still be sufficiently large
+        * to cover subsequent pages we may free.
          */
         while (nr_pages--) {
+               h->resv_huge_pages--;
+               unused_resv_pages--;
                 if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1))
-                       break;
+                       goto out;
                 cond_resched_lock(&hugetlb_lock);
         }
+
+out:
+       /* Fully uncommit the reservation */
+       h->resv_huge_pages -= unused_resv_pages;
  }
  
  
@@ -2170,6 +2190,10 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
                  * and reducing the surplus.
                  */
                 spin_unlock(&hugetlb_lock);
+
+               /* yield cpu to avoid soft lockup */
+               cond_resched();
+
                 if (hstate_is_gigantic(h))
                         ret = alloc_fresh_gigantic_page(h, nodes_allowed);
                 else
@@ -4209,7 +4233,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
                 if (saddr) {
                         spte = huge_pte_offset(svma->vm_mm, saddr);
                         if (spte) {
-                               mm_inc_nr_pmds(mm);
                                 get_page(virt_to_page(spte));
                                 break;
                         }
@@ -4224,9 +4247,9 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
         if (pud_none(*pud)) {
                 pud_populate(mm, pud,
                                 (pmd_t *)((unsigned long)spte & PAGE_MASK));
+               mm_inc_nr_pmds(mm);
         } else {
                 put_page(virt_to_page(spte));
-               mm_inc_nr_pmds(mm);
         }
         spin_unlock(ptl);
  out: