#include "pat_internal.h"
#include "mm_internal.h"
-#ifdef CONFIG_X86_PAT
-int __read_mostly pat_enabled = 1;
+#undef pr_fmt
+#define pr_fmt(fmt) "" fmt
+
+static bool boot_cpu_done;
-static inline void pat_disable(const char *reason)
+static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
+static void init_cache_modes(void);
+
+void pat_disable(const char *reason)
{
- pat_enabled = 0;
- printk(KERN_INFO "%s\n", reason);
+ if (!__pat_enabled)
+ return;
+
+ if (boot_cpu_done) {
+ WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
+ return;
+ }
+
+ __pat_enabled = 0;
+ pr_info("x86/PAT: %s\n", reason);
+
+ init_cache_modes();
}
static int __init nopat(char *str)
return 0;
}
early_param("nopat", nopat);
-#else
-static inline void pat_disable(const char *reason)
+
+bool pat_enabled(void)
{
- (void)reason;
+ return !!__pat_enabled;
}
-#endif
-
+EXPORT_SYMBOL_GPL(pat_enabled);
int pat_debug_enable;
}
__setup("debugpat", pat_debug_setup);
-static u64 __read_mostly boot_pat_state;
-
#ifdef CONFIG_X86_PAT
/*
- * X86 PAT uses page flags WC and Uncached together to keep track of
- * memory type of pages that have backing page struct. X86 PAT supports 3
- * different memory types, _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC and
- * _PAGE_CACHE_MODE_UC_MINUS and fourth state where page's memory type has not
- * been changed from its default (value of -1 used to denote this).
- * Note we do not support _PAGE_CACHE_MODE_UC here.
+ * X86 PAT uses page flags arch_1 and uncached together to keep track of
+ * memory type of pages that have backing page struct.
+ *
+ * X86 PAT supports 4 different memory types:
+ * - _PAGE_CACHE_MODE_WB
+ * - _PAGE_CACHE_MODE_WC
+ * - _PAGE_CACHE_MODE_UC_MINUS
+ * - _PAGE_CACHE_MODE_WT
+ *
+ * _PAGE_CACHE_MODE_WB is the default type.
*/
-#define _PGMT_DEFAULT 0
+#define _PGMT_WB 0
#define _PGMT_WC (1UL << PG_arch_1)
#define _PGMT_UC_MINUS (1UL << PG_uncached)
-#define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1)
+#define _PGMT_WT (1UL << PG_uncached | 1UL << PG_arch_1)
#define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1)
#define _PGMT_CLEAR_MASK (~_PGMT_MASK)
{
unsigned long pg_flags = pg->flags & _PGMT_MASK;
- if (pg_flags == _PGMT_DEFAULT)
- return -1;
+ if (pg_flags == _PGMT_WB)
+ return _PAGE_CACHE_MODE_WB;
else if (pg_flags == _PGMT_WC)
return _PAGE_CACHE_MODE_WC;
else if (pg_flags == _PGMT_UC_MINUS)
return _PAGE_CACHE_MODE_UC_MINUS;
else
- return _PAGE_CACHE_MODE_WB;
+ return _PAGE_CACHE_MODE_WT;
}
static inline void set_page_memtype(struct page *pg,
case _PAGE_CACHE_MODE_UC_MINUS:
memtype_flags = _PGMT_UC_MINUS;
break;
- case _PAGE_CACHE_MODE_WB:
- memtype_flags = _PGMT_WB;
+ case _PAGE_CACHE_MODE_WT:
+ memtype_flags = _PGMT_WT;
break;
+ case _PAGE_CACHE_MODE_WB:
default:
- memtype_flags = _PGMT_DEFAULT;
+ memtype_flags = _PGMT_WB;
break;
}
* configuration.
* Using lower indices is preferred, so we start with highest index.
*/
-void pat_init_cache_modes(void)
+static void __init_cache_modes(u64 pat)
{
- int i;
enum page_cache_mode cache;
char pat_msg[33];
- u64 pat;
+ int i;
- rdmsrl(MSR_IA32_CR_PAT, pat);
pat_msg[32] = 0;
for (i = 7; i >= 0; i--) {
cache = pat_get_cache_mode((pat >> (i * 8)) & 7,
pat_msg + 4 * i);
update_cache_mode_entry(i, cache);
}
- pr_info("PAT configuration [0-7]: %s\n", pat_msg);
+ pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
}
#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
-void pat_init(void)
+static void pat_bsp_init(u64 pat)
{
- u64 pat;
- bool boot_cpu = !boot_pat_state;
+ u64 tmp_pat;
- if (!pat_enabled)
+ if (!boot_cpu_has(X86_FEATURE_PAT)) {
+ pat_disable("PAT not supported by CPU.");
return;
+ }
- if (!cpu_has_pat) {
- if (!boot_pat_state) {
- pat_disable("PAT not supported by CPU.");
- return;
- } else {
- /*
- * If this happens we are on a secondary CPU, but
- * switched to PAT on the boot CPU. We have no way to
- * undo PAT.
- */
- printk(KERN_ERR "PAT enabled, "
- "but not supported by secondary CPU\n");
- BUG();
- }
+ rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
+ if (!tmp_pat) {
+ pat_disable("PAT MSR is 0, disabled.");
+ return;
}
- /* Set PWT to Write-Combining. All other bits stay the same */
- /*
- * PTE encoding used in Linux:
- * PAT
- * |PCD
- * ||PWT
- * |||
- * 000 WB _PAGE_CACHE_WB
- * 001 WC _PAGE_CACHE_WC
- * 010 UC- _PAGE_CACHE_UC_MINUS
- * 011 UC _PAGE_CACHE_UC
- * PAT bit unused
- */
- pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
- PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
-
- /* Boot CPU check */
- if (!boot_pat_state) {
- rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
- if (!boot_pat_state) {
- pat_disable("PAT read returns always zero, disabled.");
- return;
- }
+ wrmsrl(MSR_IA32_CR_PAT, pat);
+
+ __init_cache_modes(pat);
+}
+
+static void pat_ap_init(u64 pat)
+{
+ if (!boot_cpu_has(X86_FEATURE_PAT)) {
+ /*
+ * If this happens we are on a secondary CPU, but switched to
+ * PAT on the boot CPU. We have no way to undo PAT.
+ */
+ panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n");
}
wrmsrl(MSR_IA32_CR_PAT, pat);
+}
+
+static void init_cache_modes(void)
+{
+ u64 pat = 0;
+ static int init_cm_done;
+
+ if (init_cm_done)
+ return;
+
+ if (boot_cpu_has(X86_FEATURE_PAT)) {
+ /*
+ * CPU supports PAT. Set PAT table to be consistent with
+ * PAT MSR. This case supports "nopat" boot option, and
+ * virtual machine environments which support PAT without
+ * MTRRs. In specific, Xen has unique setup to PAT MSR.
+ *
+ * If PAT MSR returns 0, it is considered invalid and emulates
+ * as No PAT.
+ */
+ rdmsrl(MSR_IA32_CR_PAT, pat);
+ }
+
+ if (!pat) {
+ /*
+ * No PAT. Emulate the PAT table that corresponds to the two
+ * cache bits, PWT (Write Through) and PCD (Cache Disable).
+ * This setup is also the same as the BIOS default setup.
+ *
+ * PTE encoding:
+ *
+ * PCD
+ * |PWT PAT
+ * || slot
+ * 00 0 WB : _PAGE_CACHE_MODE_WB
+ * 01 1 WT : _PAGE_CACHE_MODE_WT
+ * 10 2 UC-: _PAGE_CACHE_MODE_UC_MINUS
+ * 11 3 UC : _PAGE_CACHE_MODE_UC
+ *
+ * NOTE: When WC or WP is used, it is redirected to UC- per
+ * the default setup in __cachemode2pte_tbl[].
+ */
+ pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
+ PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
+ }
+
+ __init_cache_modes(pat);
+
+ init_cm_done = 1;
+}
+
+/**
+ * pat_init - Initialize PAT MSR and PAT table
+ *
+ * This function initializes PAT MSR and PAT table with an OS-defined value
+ * to enable additional cache attributes, WC and WT.
+ *
+ * This function must be called on all CPUs using the specific sequence of
+ * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
+ * procedure for PAT.
+ */
+void pat_init(void)
+{
+ u64 pat;
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ if (!pat_enabled()) {
+ init_cache_modes();
+ return;
+ }
- if (boot_cpu)
- pat_init_cache_modes();
+ if ((c->x86_vendor == X86_VENDOR_INTEL) &&
+ (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
+ ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
+ /*
+ * PAT support with the lower four entries. Intel Pentium 2,
+ * 3, M, and 4 are affected by PAT errata, which makes the
+ * upper four entries unusable. To be on the safe side, we don't
+ * use those.
+ *
+ * PTE encoding:
+ * PAT
+ * |PCD
+ * ||PWT PAT
+ * ||| slot
+ * 000 0 WB : _PAGE_CACHE_MODE_WB
+ * 001 1 WC : _PAGE_CACHE_MODE_WC
+ * 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS
+ * 011 3 UC : _PAGE_CACHE_MODE_UC
+ * PAT bit unused
+ *
+ * NOTE: When WT or WP is used, it is redirected to UC- per
+ * the default setup in __cachemode2pte_tbl[].
+ */
+ pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
+ PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
+ } else {
+ /*
+ * Full PAT support. We put WT in slot 7 to improve
+ * robustness in the presence of errata that might cause
+ * the high PAT bit to be ignored. This way, a buggy slot 7
+ * access will hit slot 3, and slot 3 is UC, so at worst
+ * we lose performance without causing a correctness issue.
+ * Pentium 4 erratum N46 is an example for such an erratum,
+ * although we try not to use PAT at all on affected CPUs.
+ *
+ * PTE encoding:
+ * PAT
+ * |PCD
+ * ||PWT PAT
+ * ||| slot
+ * 000 0 WB : _PAGE_CACHE_MODE_WB
+ * 001 1 WC : _PAGE_CACHE_MODE_WC
+ * 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS
+ * 011 3 UC : _PAGE_CACHE_MODE_UC
+ * 100 4 WB : Reserved
+ * 101 5 WC : Reserved
+ * 110 6 UC-: Reserved
+ * 111 7 WT : _PAGE_CACHE_MODE_WT
+ *
+ * The reserved slots are unused, but mapped to their
+ * corresponding types in the presence of PAT errata.
+ */
+ pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
+ PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, WT);
+ }
+
+ if (!boot_cpu_done) {
+ pat_bsp_init(pat);
+ boot_cpu_done = true;
+ } else {
+ pat_ap_init(pat);
+ }
}
#undef PAT
* request is for WB.
*/
if (req_type == _PAGE_CACHE_MODE_WB) {
- u8 mtrr_type;
+ u8 mtrr_type, uniform;
- mtrr_type = mtrr_type_lookup(start, end);
+ mtrr_type = mtrr_type_lookup(start, end, &uniform);
if (mtrr_type != MTRR_TYPE_WRBACK)
return _PAGE_CACHE_MODE_UC_MINUS;
/*
* For RAM pages, we use page flags to mark the pages with appropriate type.
- * Here we do two pass:
- * - Find the memtype of all the pages in the range, look for any conflicts
- * - In case of no conflicts, set the new memtype for pages in the range
+ * The page flags are limited to four types, WB (default), WC, WT and UC-.
+ * WP request fails with -EINVAL, and UC gets redirected to UC-. Setting
+ * a new memory type is only allowed for a page mapped with the default WB
+ * type.
+ *
+ * Here we do two passes:
+ * - Find the memtype of all the pages in the range, look for any conflicts.
+ * - In case of no conflicts, set the new memtype for pages in the range.
*/
static int reserve_ram_pages_type(u64 start, u64 end,
enum page_cache_mode req_type,
struct page *page;
u64 pfn;
+ if (req_type == _PAGE_CACHE_MODE_WP) {
+ if (new_type)
+ *new_type = _PAGE_CACHE_MODE_UC_MINUS;
+ return -EINVAL;
+ }
+
if (req_type == _PAGE_CACHE_MODE_UC) {
/* We do not support strong UC */
WARN_ON_ONCE(1);
page = pfn_to_page(pfn);
type = get_page_memtype(page);
- if (type != -1) {
- pr_info("reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n",
+ if (type != _PAGE_CACHE_MODE_WB) {
+ pr_info("x86/PAT: reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n",
start, end - 1, type, req_type);
if (new_type)
*new_type = type;
for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
page = pfn_to_page(pfn);
- set_page_memtype(page, -1);
+ set_page_memtype(page, _PAGE_CACHE_MODE_WB);
}
return 0;
}
* - _PAGE_CACHE_MODE_WC
* - _PAGE_CACHE_MODE_UC_MINUS
* - _PAGE_CACHE_MODE_UC
+ * - _PAGE_CACHE_MODE_WT
*
* If new_type is NULL, function will return an error if it cannot reserve the
* region with req_type. If new_type is non-NULL, function will return
BUG_ON(start >= end); /* end is exclusive */
- if (!pat_enabled) {
+ if (!pat_enabled()) {
/* This is identical to page table setting without PAT */
- if (new_type) {
- if (req_type == _PAGE_CACHE_MODE_WC)
- *new_type = _PAGE_CACHE_MODE_UC_MINUS;
- else
- *new_type = req_type;
- }
+ if (new_type)
+ *new_type = req_type;
return 0;
}
err = rbt_memtype_check_insert(new, new_type);
if (err) {
- printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
- start, end - 1,
- cattr_name(new->type), cattr_name(req_type));
+ pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
+ start, end - 1,
+ cattr_name(new->type), cattr_name(req_type));
kfree(new);
spin_unlock(&memtype_lock);
int is_range_ram;
struct memtype *entry;
- if (!pat_enabled)
+ if (!pat_enabled())
return 0;
/* Low ISA region is always mapped WB. No need to track */
spin_unlock(&memtype_lock);
if (!entry) {
- printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
- current->comm, current->pid, start, end - 1);
+ pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
+ current->comm, current->pid, start, end - 1);
return -EINVAL;
}
* Only to be called when PAT is enabled
*
* Returns _PAGE_CACHE_MODE_WB, _PAGE_CACHE_MODE_WC, _PAGE_CACHE_MODE_UC_MINUS
- * or _PAGE_CACHE_MODE_UC
+ * or _PAGE_CACHE_MODE_WT.
*/
static enum page_cache_mode lookup_memtype(u64 paddr)
{
if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
struct page *page;
- page = pfn_to_page(paddr >> PAGE_SHIFT);
- rettype = get_page_memtype(page);
- /*
- * -1 from get_page_memtype() implies RAM page is in its
- * default state and not reserved, and hence of type WB
- */
- if (rettype == -1)
- rettype = _PAGE_CACHE_MODE_WB;
- return rettype;
+ page = pfn_to_page(paddr >> PAGE_SHIFT);
+ return get_page_memtype(page);
}
spin_lock(&memtype_lock);
u64 to = from + size;
u64 cursor = from;
- if (!pat_enabled)
+ if (!pat_enabled())
return 1;
while (cursor < to) {
if (!devmem_is_allowed(pfn)) {
- printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
- current->comm, from, to - 1);
+ pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
+ current->comm, from, to - 1);
return 0;
}
cursor += PAGE_SIZE;
if (file->f_flags & O_DSYNC)
pcm = _PAGE_CACHE_MODE_UC_MINUS;
-#ifdef CONFIG_X86_32
- /*
- * On the PPro and successors, the MTRRs are used to set
- * memory types for physical addresses outside main memory,
- * so blindly setting UC or PWT on those pages is wrong.
- * For Pentiums and earlier, the surround logic should disable
- * caching for the high addresses through the KEN pin, but
- * we maintain the tradition of paranoia in this code.
- */
- if (!pat_enabled &&
- !(boot_cpu_has(X86_FEATURE_MTRR) ||
- boot_cpu_has(X86_FEATURE_K6_MTRR) ||
- boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
- boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
- (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
- pcm = _PAGE_CACHE_MODE_UC;
- }
-#endif
-
*vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
cachemode2protval(pcm));
return 1;
size;
if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
- printk(KERN_INFO "%s:%d ioremap_change_attr failed %s "
- "for [mem %#010Lx-%#010Lx]\n",
+ pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid,
cattr_name(pcm),
base, (unsigned long long)(base + size-1));
* the type requested matches the type of first page in the range.
*/
if (is_ram) {
- if (!pat_enabled)
+ if (!pat_enabled())
return 0;
pcm = lookup_memtype(paddr);
if (want_pcm != pcm) {
- printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
+ pr_warn("x86/PAT: %s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
current->comm, current->pid,
cattr_name(want_pcm),
(unsigned long long)paddr,
if (strict_prot ||
!is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
free_memtype(paddr, paddr + size);
- printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
- " for [mem %#010Lx-%#010Lx], got %s\n",
- current->comm, current->pid,
- cattr_name(want_pcm),
- (unsigned long long)paddr,
- (unsigned long long)(paddr + size - 1),
- cattr_name(pcm));
+ pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n",
+ current->comm, current->pid,
+ cattr_name(want_pcm),
+ (unsigned long long)paddr,
+ (unsigned long long)(paddr + size - 1),
+ cattr_name(pcm));
return -EINVAL;
}
/*
return ret;
}
- if (!pat_enabled)
+ if (!pat_enabled())
return 0;
/*
{
enum page_cache_mode pcm;
- if (!pat_enabled)
+ if (!pat_enabled())
return 0;
/* Set prot based on lookup */
pgprot_t pgprot_writecombine(pgprot_t prot)
{
- if (pat_enabled)
- return __pgprot(pgprot_val(prot) |
+ return __pgprot(pgprot_val(prot) |
cachemode2protval(_PAGE_CACHE_MODE_WC));
- else
- return pgprot_noncached(prot);
}
EXPORT_SYMBOL_GPL(pgprot_writecombine);
+pgprot_t pgprot_writethrough(pgprot_t prot)
+{
+ return __pgprot(pgprot_val(prot) |
+ cachemode2protval(_PAGE_CACHE_MODE_WT));
+}
+EXPORT_SYMBOL_GPL(pgprot_writethrough);
+
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
static struct memtype *memtype_get_idx(loff_t pos)
static int __init pat_memtype_list_init(void)
{
- if (pat_enabled) {
+ if (pat_enabled()) {
debugfs_create_file("pat_memtype_list", S_IRUSR,
arch_debugfs_dir, NULL, &memtype_fops);
}