These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / arch / x86 / mm / pageattr.c
index 89af288..b599a78 100644 (file)
@@ -4,7 +4,6 @@
  */
 #include <linux/highmem.h>
 #include <linux/bootmem.h>
-#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
@@ -14,6 +13,7 @@
 #include <linux/percpu.h>
 #include <linux/gfp.h>
 #include <linux/pci.h>
+#include <linux/vmalloc.h>
 
 #include <asm/e820.h>
 #include <asm/processor.h>
@@ -33,7 +33,7 @@ struct cpa_data {
        pgd_t           *pgd;
        pgprot_t        mask_set;
        pgprot_t        mask_clr;
-       int             numpages;
+       unsigned long   numpages;
        int             flags;
        unsigned long   pfn;
        unsigned        force_split : 1;
@@ -129,16 +129,15 @@ within(unsigned long addr, unsigned long start, unsigned long end)
  */
 void clflush_cache_range(void *vaddr, unsigned int size)
 {
-       void *vend = vaddr + size - 1;
+       unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
+       void *vend = vaddr + size;
+       void *p;
 
        mb();
 
-       for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size)
-               clflushopt(vaddr);
-       /*
-        * Flush any possible final partial cacheline:
-        */
-       clflushopt(vend);
+       for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
+            p < vend; p += boot_cpu_data.x86_clflush_size)
+               clflushopt(p);
 
        mb();
 }
@@ -418,17 +417,31 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr)
        phys_addr_t phys_addr;
        unsigned long offset;
        enum pg_level level;
-       unsigned long psize;
-       unsigned long pmask;
        pte_t *pte;
 
        pte = lookup_address(virt_addr, &level);
        BUG_ON(!pte);
-       psize = page_level_size(level);
-       pmask = page_level_mask(level);
-       offset = virt_addr & ~pmask;
-       phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
-       return (phys_addr | offset);
+
+       /*
+        * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t
+        * before being left-shifted PAGE_SHIFT bits -- this trick is to
+        * make 32-PAE kernel work correctly.
+        */
+       switch (level) {
+       case PG_LEVEL_1G:
+               phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
+               offset = virt_addr & ~PUD_PAGE_MASK;
+               break;
+       case PG_LEVEL_2M:
+               phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
+               offset = virt_addr & ~PMD_PAGE_MASK;
+               break;
+       default:
+               phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
+               offset = virt_addr & ~PAGE_MASK;
+       }
+
+       return (phys_addr_t)(phys_addr | offset);
 }
 EXPORT_SYMBOL_GPL(slow_virt_to_phys);
 
@@ -461,7 +474,7 @@ static int
 try_preserve_large_page(pte_t *kpte, unsigned long address,
                        struct cpa_data *cpa)
 {
-       unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
+       unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn;
        pte_t new_pte, old_pte, *tmp;
        pgprot_t old_prot, new_prot, req_prot;
        int i, do_split = 1;
@@ -481,17 +494,21 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 
        switch (level) {
        case PG_LEVEL_2M:
-#ifdef CONFIG_X86_64
+               old_prot = pmd_pgprot(*(pmd_t *)kpte);
+               old_pfn = pmd_pfn(*(pmd_t *)kpte);
+               break;
        case PG_LEVEL_1G:
-#endif
-               psize = page_level_size(level);
-               pmask = page_level_mask(level);
+               old_prot = pud_pgprot(*(pud_t *)kpte);
+               old_pfn = pud_pfn(*(pud_t *)kpte);
                break;
        default:
                do_split = -EINVAL;
                goto out_unlock;
        }
 
+       psize = page_level_size(level);
+       pmask = page_level_mask(level);
+
        /*
         * Calculate the number of pages, which fit into this large
         * page starting at address:
@@ -507,7 +524,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
         * up accordingly.
         */
        old_pte = *kpte;
-       old_prot = req_prot = pgprot_large_2_4k(pte_pgprot(old_pte));
+       req_prot = pgprot_large_2_4k(old_prot);
 
        pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
        pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
@@ -533,10 +550,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
        req_prot = canon_pgprot(req_prot);
 
        /*
-        * old_pte points to the large page base address. So we need
+        * old_pfn points to the large page base pfn. So we need
         * to add the offset of the virtual address:
         */
-       pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
+       pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT);
        cpa->pfn = pfn;
 
        new_prot = static_protections(req_prot, address, pfn);
@@ -547,7 +564,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
         * the pages in the range we try to preserve:
         */
        addr = address & pmask;
-       pfn = pte_pfn(old_pte);
+       pfn = old_pfn;
        for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
                pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
 
@@ -577,7 +594,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
                 * The address is aligned and the number of pages
                 * covers the full page.
                 */
-               new_pte = pfn_pte(pte_pfn(old_pte), new_prot);
+               new_pte = pfn_pte(old_pfn, new_prot);
                __set_pmd_pte(kpte, address, new_pte);
                cpa->flags |= CPA_FLUSHTLB;
                do_split = 0;
@@ -594,7 +611,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
                   struct page *base)
 {
        pte_t *pbase = (pte_t *)page_address(base);
-       unsigned long pfn, pfninc = 1;
+       unsigned long ref_pfn, pfn, pfninc = 1;
        unsigned int i, level;
        pte_t *tmp;
        pgprot_t ref_prot;
@@ -611,26 +628,33 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
        }
 
        paravirt_alloc_pte(&init_mm, page_to_pfn(base));
-       ref_prot = pte_pgprot(pte_clrhuge(*kpte));
 
-       /* promote PAT bit to correct position */
-       if (level == PG_LEVEL_2M)
+       switch (level) {
+       case PG_LEVEL_2M:
+               ref_prot = pmd_pgprot(*(pmd_t *)kpte);
+               /* clear PSE and promote PAT bit to correct position */
                ref_prot = pgprot_large_2_4k(ref_prot);
+               ref_pfn = pmd_pfn(*(pmd_t *)kpte);
+               break;
 
-#ifdef CONFIG_X86_64
-       if (level == PG_LEVEL_1G) {
+       case PG_LEVEL_1G:
+               ref_prot = pud_pgprot(*(pud_t *)kpte);
+               ref_pfn = pud_pfn(*(pud_t *)kpte);
                pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
+
                /*
-                * Set the PSE flags only if the PRESENT flag is set
+                * Clear the PSE flags if the PRESENT flag is not set
                 * otherwise pmd_present/pmd_huge will return true
                 * even on a non present pmd.
                 */
-               if (pgprot_val(ref_prot) & _PAGE_PRESENT)
-                       pgprot_val(ref_prot) |= _PAGE_PSE;
-               else
+               if (!(pgprot_val(ref_prot) & _PAGE_PRESENT))
                        pgprot_val(ref_prot) &= ~_PAGE_PSE;
+               break;
+
+       default:
+               spin_unlock(&pgd_lock);
+               return 1;
        }
-#endif
 
        /*
         * Set the GLOBAL flags only if the PRESENT flag is set
@@ -646,13 +670,16 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
        /*
         * Get the target pfn from the original entry:
         */
-       pfn = pte_pfn(*kpte);
+       pfn = ref_pfn;
        for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
                set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot)));
 
-       if (pfn_range_is_mapped(PFN_DOWN(__pa(address)),
-                               PFN_DOWN(__pa(address)) + 1))
-               split_page_count(level);
+       if (virt_addr_valid(address)) {
+               unsigned long pfn = PFN_DOWN(__pa(address));
+
+               if (pfn_range_is_mapped(pfn, pfn + 1))
+                       split_page_count(level);
+       }
 
        /*
         * Install the new, split up pagetable.
@@ -1324,7 +1351,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
                 * CPA operation. Either a large page has been
                 * preserved or a single page update happened.
                 */
-               BUG_ON(cpa->numpages > numpages);
+               BUG_ON(cpa->numpages > numpages || !cpa->numpages);
                numpages -= cpa->numpages;
                if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
                        cpa->curpage++;
@@ -1468,6 +1495,9 @@ int _set_memory_uc(unsigned long addr, int numpages)
 {
        /*
         * for now UC MINUS. see comments in ioremap_nocache()
+        * If you really need strong UC use ioremap_uc(), but note
+        * that you cannot override IO areas with set_memory_*() as
+        * these helpers cannot work with IO memory.
         */
        return change_page_attr_set(&addr, numpages,
                                    cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
@@ -1502,12 +1532,10 @@ EXPORT_SYMBOL(set_memory_uc);
 static int _set_memory_array(unsigned long *addr, int addrinarray,
                enum page_cache_mode new_type)
 {
+       enum page_cache_mode set_type;
        int i, j;
        int ret;
 
-       /*
-        * for now UC MINUS. see comments in ioremap_nocache()
-        */
        for (i = 0; i < addrinarray; i++) {
                ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE,
                                        new_type, NULL);
@@ -1515,9 +1543,12 @@ static int _set_memory_array(unsigned long *addr, int addrinarray,
                        goto out_free;
        }
 
+       /* If WC, set to UC- first and then WC */
+       set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
+                               _PAGE_CACHE_MODE_UC_MINUS : new_type;
+
        ret = change_page_attr_set(addr, addrinarray,
-                                  cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
-                                  1);
+                                  cachemode2pgprot(set_type), 1);
 
        if (!ret && new_type == _PAGE_CACHE_MODE_WC)
                ret = change_page_attr_set_clr(addr, addrinarray,
@@ -1549,6 +1580,12 @@ int set_memory_array_wc(unsigned long *addr, int addrinarray)
 }
 EXPORT_SYMBOL(set_memory_array_wc);
 
+int set_memory_array_wt(unsigned long *addr, int addrinarray)
+{
+       return _set_memory_array(addr, addrinarray, _PAGE_CACHE_MODE_WT);
+}
+EXPORT_SYMBOL_GPL(set_memory_array_wt);
+
 int _set_memory_wc(unsigned long addr, int numpages)
 {
        int ret;
@@ -1571,27 +1608,42 @@ int set_memory_wc(unsigned long addr, int numpages)
 {
        int ret;
 
-       if (!pat_enabled)
-               return set_memory_uc(addr, numpages);
-
        ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
                _PAGE_CACHE_MODE_WC, NULL);
        if (ret)
-               goto out_err;
+               return ret;
 
        ret = _set_memory_wc(addr, numpages);
        if (ret)
-               goto out_free;
+               free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
 
-       return 0;
-
-out_free:
-       free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
-out_err:
        return ret;
 }
 EXPORT_SYMBOL(set_memory_wc);
 
+int _set_memory_wt(unsigned long addr, int numpages)
+{
+       return change_page_attr_set(&addr, numpages,
+                                   cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0);
+}
+
+int set_memory_wt(unsigned long addr, int numpages)
+{
+       int ret;
+
+       ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
+                             _PAGE_CACHE_MODE_WT, NULL);
+       if (ret)
+               return ret;
+
+       ret = _set_memory_wt(addr, numpages);
+       if (ret)
+               free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(set_memory_wt);
+
 int _set_memory_wb(unsigned long addr, int numpages)
 {
        /* WB cache mode is hard wired to all cache attribute bits being 0 */
@@ -1682,6 +1734,7 @@ static int _set_pages_array(struct page **pages, int addrinarray,
 {
        unsigned long start;
        unsigned long end;
+       enum page_cache_mode set_type;
        int i;
        int free_idx;
        int ret;
@@ -1695,8 +1748,12 @@ static int _set_pages_array(struct page **pages, int addrinarray,
                        goto err_out;
        }
 
+       /* If WC, set to UC- first and then WC */
+       set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
+                               _PAGE_CACHE_MODE_UC_MINUS : new_type;
+
        ret = cpa_set_pages_array(pages, addrinarray,
-                       cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS));
+                                 cachemode2pgprot(set_type));
        if (!ret && new_type == _PAGE_CACHE_MODE_WC)
                ret = change_page_attr_set_clr(NULL, addrinarray,
                                               cachemode2pgprot(
@@ -1730,6 +1787,12 @@ int set_pages_array_wc(struct page **pages, int addrinarray)
 }
 EXPORT_SYMBOL(set_pages_array_wc);
 
+int set_pages_array_wt(struct page **pages, int addrinarray)
+{
+       return _set_pages_array(pages, addrinarray, _PAGE_CACHE_MODE_WT);
+}
+EXPORT_SYMBOL_GPL(set_pages_array_wt);
+
 int set_pages_wb(struct page *page, int numpages)
 {
        unsigned long addr = (unsigned long)page_address(page);