These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / iommu / arm-smmu.c
index 65075ef..47dc7a7 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
                ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
                        ? 0x400 : 0))
 
+#ifdef CONFIG_64BIT
+#define smmu_writeq    writeq_relaxed
+#else
+#define smmu_writeq(reg64, addr)                               \
+       do {                                                    \
+               u64 __val = (reg64);                            \
+               void __iomem *__addr = (addr);                  \
+               writel_relaxed(__val >> 32, __addr + 4);        \
+               writel_relaxed(__val, __addr);                  \
+       } while (0)
+#endif
+
 /* Configuration registers */
 #define ARM_SMMU_GR0_sCR0              0x0
 #define sCR0_CLIENTPD                  (1 << 0)
 #define ARM_SMMU_CB_SCTLR              0x0
 #define ARM_SMMU_CB_RESUME             0x8
 #define ARM_SMMU_CB_TTBCR2             0x10
-#define ARM_SMMU_CB_TTBR0_LO           0x20
-#define ARM_SMMU_CB_TTBR0_HI           0x24
-#define ARM_SMMU_CB_TTBR1_LO           0x28
-#define ARM_SMMU_CB_TTBR1_HI           0x2c
+#define ARM_SMMU_CB_TTBR0              0x20
+#define ARM_SMMU_CB_TTBR1              0x28
 #define ARM_SMMU_CB_TTBCR              0x30
 #define ARM_SMMU_CB_S1_MAIR0           0x38
 #define ARM_SMMU_CB_S1_MAIR1           0x3c
 #define ARM_SMMU_CB_S1_TLBIVAL         0x620
 #define ARM_SMMU_CB_S2_TLBIIPAS2       0x630
 #define ARM_SMMU_CB_S2_TLBIIPAS2L      0x638
-#define ARM_SMMU_CB_ATS1PR_LO          0x800
-#define ARM_SMMU_CB_ATS1PR_HI          0x804
+#define ARM_SMMU_CB_ATS1PR             0x800
 #define ARM_SMMU_CB_ATSR               0x8f0
 
 #define SCTLR_S1_ASIDPNE               (1 << 12)
 #define TTBCR2_SEP_SHIFT               15
 #define TTBCR2_SEP_UPSTREAM            (0x7 << TTBCR2_SEP_SHIFT)
 
-#define TTBRn_HI_ASID_SHIFT            16
+#define TTBRn_ASID_SHIFT               48
 
 #define FSR_MULTI                      (1 << 31)
 #define FSR_SS                         (1 << 30)
 #define FSYNR0_WNR                     (1 << 4)
 
 static int force_stage;
-module_param_named(force_stage, force_stage, int, S_IRUGO | S_IWUSR);
+module_param_named(force_stage, force_stage, int, S_IRUGO);
 MODULE_PARM_DESC(force_stage,
        "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
 
@@ -608,34 +618,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
        }
 }
 
-static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie)
-{
-       struct arm_smmu_domain *smmu_domain = cookie;
-       struct arm_smmu_device *smmu = smmu_domain->smmu;
-       unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
-
-
-       /* Ensure new page tables are visible to the hardware walker */
-       if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) {
-               dsb(ishst);
-       } else {
-               /*
-                * If the SMMU can't walk tables in the CPU caches, treat them
-                * like non-coherent DMA since we need to flush the new entries
-                * all the way out to memory. There's no possibility of
-                * recursion here as the SMMU table walker will not be wired
-                * through another SMMU.
-                */
-               dma_map_page(smmu->dev, virt_to_page(addr), offset, size,
-                            DMA_TO_DEVICE);
-       }
-}
-
 static struct iommu_gather_ops arm_smmu_gather_ops = {
        .tlb_flush_all  = arm_smmu_tlb_inv_context,
        .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
        .tlb_sync       = arm_smmu_tlb_sync,
-       .flush_pgtable  = arm_smmu_flush_pgtable,
 };
 
 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
@@ -719,12 +705,12 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
                                       struct io_pgtable_cfg *pgtbl_cfg)
 {
        u32 reg;
+       u64 reg64;
        bool stage1;
        struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
        struct arm_smmu_device *smmu = smmu_domain->smmu;
-       void __iomem *cb_base, *gr0_base, *gr1_base;
+       void __iomem *cb_base, *gr1_base;
 
-       gr0_base = ARM_SMMU_GR0(smmu);
        gr1_base = ARM_SMMU_GR1(smmu);
        stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
        cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
@@ -762,22 +748,17 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 
        /* TTBRs */
        if (stage1) {
-               reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
-               reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32;
-               reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
-
-               reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
-               reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32;
-               reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI);
+               reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+
+               reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT;
+               smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0);
+
+               reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
+               reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT;
+               smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR1);
        } else {
-               reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
-               reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr >> 32;
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
+               reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
+               smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0);
        }
 
        /* TTBCR */
@@ -899,6 +880,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
                .ias            = ias,
                .oas            = oas,
                .tlb            = &arm_smmu_gather_ops,
+               .iommu_dev      = smmu->dev,
        };
 
        smmu_domain->smmu = smmu;
@@ -1229,23 +1211,21 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
        void __iomem *cb_base;
        u32 tmp;
        u64 phys;
+       unsigned long va;
 
        cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
 
-       if (smmu->version == 1) {
-               u32 reg = iova & ~0xfff;
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
-       } else {
-               u32 reg = iova & ~0xfff;
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
-               reg = ((u64)iova & ~0xfff) >> 32;
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI);
-       }
+       /* ATS1 registers can only be written atomically */
+       va = iova & ~0xfffUL;
+       if (smmu->version == ARM_SMMU_V2)
+               smmu_writeq(va, cb_base + ARM_SMMU_CB_ATS1PR);
+       else
+               writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
 
        if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
                                      !(tmp & ATSR_ACTIVE), 5, 50)) {
                dev_err(dev,
-                       "iova to phys timed out on 0x%pad. Falling back to software table walk.\n",
+                       "iova to phys timed out on %pad. Falling back to software table walk.\n",
                        &iova);
                return ops->iova_to_phys(ops, iova);
        }
@@ -1315,33 +1295,25 @@ static void __arm_smmu_release_pci_iommudata(void *data)
        kfree(data);
 }
 
-static int arm_smmu_add_pci_device(struct pci_dev *pdev)
+static int arm_smmu_init_pci_device(struct pci_dev *pdev,
+                                   struct iommu_group *group)
 {
-       int i, ret;
-       u16 sid;
-       struct iommu_group *group;
        struct arm_smmu_master_cfg *cfg;
-
-       group = iommu_group_get_for_dev(&pdev->dev);
-       if (IS_ERR(group))
-               return PTR_ERR(group);
+       u16 sid;
+       int i;
 
        cfg = iommu_group_get_iommudata(group);
        if (!cfg) {
                cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
-               if (!cfg) {
-                       ret = -ENOMEM;
-                       goto out_put_group;
-               }
+               if (!cfg)
+                       return -ENOMEM;
 
                iommu_group_set_iommudata(group, cfg,
                                          __arm_smmu_release_pci_iommudata);
        }
 
-       if (cfg->num_streamids >= MAX_MASTER_STREAMIDS) {
-               ret = -ENOSPC;
-               goto out_put_group;
-       }
+       if (cfg->num_streamids >= MAX_MASTER_STREAMIDS)
+               return -ENOSPC;
 
        /*
         * Assume Stream ID == Requester ID for now.
@@ -1357,16 +1329,13 @@ static int arm_smmu_add_pci_device(struct pci_dev *pdev)
                cfg->streamids[cfg->num_streamids++] = sid;
 
        return 0;
-out_put_group:
-       iommu_group_put(group);
-       return ret;
 }
 
-static int arm_smmu_add_platform_device(struct device *dev)
+static int arm_smmu_init_platform_device(struct device *dev,
+                                        struct iommu_group *group)
 {
-       struct iommu_group *group;
-       struct arm_smmu_master *master;
        struct arm_smmu_device *smmu = find_smmu_for_device(dev);
+       struct arm_smmu_master *master;
 
        if (!smmu)
                return -ENODEV;
@@ -1375,21 +1344,20 @@ static int arm_smmu_add_platform_device(struct device *dev)
        if (!master)
                return -ENODEV;
 
-       /* No automatic group creation for platform devices */
-       group = iommu_group_alloc();
-       if (IS_ERR(group))
-               return PTR_ERR(group);
-
        iommu_group_set_iommudata(group, &master->cfg, NULL);
-       return iommu_group_add_device(group, dev);
+
+       return 0;
 }
 
 static int arm_smmu_add_device(struct device *dev)
 {
-       if (dev_is_pci(dev))
-               return arm_smmu_add_pci_device(to_pci_dev(dev));
+       struct iommu_group *group;
+
+       group = iommu_group_get_for_dev(dev);
+       if (IS_ERR(group))
+               return PTR_ERR(group);
 
-       return arm_smmu_add_platform_device(dev);
+       return 0;
 }
 
 static void arm_smmu_remove_device(struct device *dev)
@@ -1397,6 +1365,32 @@ static void arm_smmu_remove_device(struct device *dev)
        iommu_group_remove_device(dev);
 }
 
+static struct iommu_group *arm_smmu_device_group(struct device *dev)
+{
+       struct iommu_group *group;
+       int ret;
+
+       if (dev_is_pci(dev))
+               group = pci_device_group(dev);
+       else
+               group = generic_device_group(dev);
+
+       if (IS_ERR(group))
+               return group;
+
+       if (dev_is_pci(dev))
+               ret = arm_smmu_init_pci_device(to_pci_dev(dev), group);
+       else
+               ret = arm_smmu_init_platform_device(dev, group);
+
+       if (ret) {
+               iommu_group_put(group);
+               group = ERR_PTR(ret);
+       }
+
+       return group;
+}
+
 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
                                    enum iommu_attr attr, void *data)
 {
@@ -1453,6 +1447,7 @@ static struct iommu_ops arm_smmu_ops = {
        .iova_to_phys           = arm_smmu_iova_to_phys,
        .add_device             = arm_smmu_add_device,
        .remove_device          = arm_smmu_remove_device,
+       .device_group           = arm_smmu_device_group,
        .domain_get_attr        = arm_smmu_domain_get_attr,
        .domain_set_attr        = arm_smmu_domain_set_attr,
        .pgsize_bitmap          = -1UL, /* Restricted during device attach */
@@ -1533,6 +1528,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
        unsigned long size;
        void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
        u32 id;
+       bool cttw_dt, cttw_reg;
 
        dev_notice(smmu->dev, "probing hardware configuration...\n");
        dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version);
@@ -1572,10 +1568,22 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
                dev_notice(smmu->dev, "\taddress translation ops\n");
        }
 
-       if (id & ID0_CTTW) {
+       /*
+        * In order for DMA API calls to work properly, we must defer to what
+        * the DT says about coherency, regardless of what the hardware claims.
+        * Fortunately, this also opens up a workaround for systems where the
+        * ID register value has ended up configured incorrectly.
+        */
+       cttw_dt = of_dma_is_coherent(smmu->dev->of_node);
+       cttw_reg = !!(id & ID0_CTTW);
+       if (cttw_dt)
                smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
-               dev_notice(smmu->dev, "\tcoherent table walk\n");
-       }
+       if (cttw_dt || cttw_reg)
+               dev_notice(smmu->dev, "\t%scoherent table walk\n",
+                          cttw_dt ? "" : "non-");
+       if (cttw_dt != cttw_reg)
+               dev_notice(smmu->dev,
+                          "\t(IDR0.CTTW overridden by dma-coherent property)\n");
 
        if (id & ID0_SMS) {
                u32 smr, sid, mask;