Upgrade to 4.4.50-rt62
[kvmfornfv.git] / kernel / drivers / iommu / intel-iommu.c
index a2e1b7f..9413b07 100644 (file)
@@ -885,7 +885,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
                return NULL;
 
        if (dev_is_pci(dev)) {
+               struct pci_dev *pf_pdev;
+
                pdev = to_pci_dev(dev);
+               /* VFs aren't listed in scope tables; we need to look up
+                * the PF instead to find the IOMMU. */
+               pf_pdev = pci_physfn(pdev);
+               dev = &pf_pdev->dev;
                segment = pci_domain_nr(pdev->bus);
        } else if (has_acpi_companion(dev))
                dev = &ACPI_COMPANION(dev)->dev;
@@ -898,6 +904,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
                for_each_active_dev_scope(drhd->devices,
                                          drhd->devices_cnt, i, tmp) {
                        if (tmp == dev) {
+                               /* For a VF use its original BDF# not that of the PF
+                                * which we used for the IOMMU lookup. Strictly speaking
+                                * we could do this for all PCI devices; we only need to
+                                * get the BDF# from the scope table for ACPI matches. */
+                               if (pdev->is_virtfn)
+                                       goto got_pdev;
+
                                *bus = drhd->devices[i].bus;
                                *devfn = drhd->devices[i].devfn;
                                goto out;
@@ -1672,6 +1685,7 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
        if (!iommu->domains || !iommu->domain_ids)
                return;
 
+again:
        spin_lock_irqsave(&device_domain_lock, flags);
        list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
                struct dmar_domain *domain;
@@ -1684,10 +1698,19 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
 
                domain = info->domain;
 
-               dmar_remove_one_dev_info(domain, info->dev);
+               __dmar_remove_one_dev_info(info);
 
-               if (!domain_type_is_vm_or_si(domain))
+               if (!domain_type_is_vm_or_si(domain)) {
+                       /*
+                        * The domain_exit() function  can't be called under
+                        * device_domain_lock, as it takes this lock itself.
+                        * So release the lock here and re-run the loop
+                        * afterwards.
+                        */
+                       spin_unlock_irqrestore(&device_domain_lock, flags);
                        domain_exit(domain);
+                       goto again;
+               }
        }
        spin_unlock_irqrestore(&device_domain_lock, flags);
 
@@ -1970,6 +1993,25 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
        if (context_present(context))
                goto out_unlock;
 
+       /*
+        * For kdump cases, old valid entries may be cached due to the
+        * in-flight DMA and copied pgtable, but there is no unmapping
+        * behaviour for them, thus we need an explicit cache flush for
+        * the newly-mapped device. For kdump, at this point, the device
+        * is supposed to finish reset at its driver probe stage, so no
+        * in-flight DMA will exist, and we don't need to worry anymore
+        * hereafter.
+        */
+       if (context_copied(context)) {
+               u16 did_old = context_domain_id(context);
+
+               if (did_old >= 0 && did_old < cap_ndoms(iommu->cap))
+                       iommu->flush.flush_context(iommu, did_old,
+                                                  (((u16)bus) << 8) | devfn,
+                                                  DMA_CCMD_MASK_NOBIT,
+                                                  DMA_CCMD_DEVICE_INVL);
+       }
+
        pgd = domain->pgd;
 
        context_clear_entry(context);
@@ -2032,7 +2074,7 @@ out_unlock:
        spin_unlock(&iommu->lock);
        spin_unlock_irqrestore(&device_domain_lock, flags);
 
-       return 0;
+       return ret;
 }
 
 struct domain_context_mapping_data {
@@ -3169,11 +3211,6 @@ static int __init init_dmars(void)
                        }
                }
 
-               iommu_flush_write_buffer(iommu);
-               iommu_set_root_entry(iommu);
-               iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
-               iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
-
                if (!ecap_pass_through(iommu->ecap))
                        hw_pass_through = 0;
 #ifdef CONFIG_INTEL_IOMMU_SVM
@@ -3182,6 +3219,18 @@ static int __init init_dmars(void)
 #endif
        }
 
+       /*
+        * Now that qi is enabled on all iommus, set the root entry and flush
+        * caches. This is required on some Intel X58 chipsets, otherwise the
+        * flush_context function will loop forever and the boot hangs.
+        */
+       for_each_active_iommu(iommu, drhd) {
+               iommu_flush_write_buffer(iommu);
+               iommu_set_root_entry(iommu);
+               iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
+               iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+       }
+
        if (iommu_pass_through)
                iommu_identity_mapping |= IDENTMAP_ALL;
 
@@ -4175,10 +4224,11 @@ int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
        if (!atsru)
                return 0;
 
-       if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
+       if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
                for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
                                          i, dev)
                        return -EBUSY;
+       }
 
        return 0;
 }
@@ -4989,6 +5039,25 @@ static void intel_iommu_remove_device(struct device *dev)
 }
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
+#define MAX_NR_PASID_BITS (20)
+static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu)
+{
+       /*
+        * Convert ecap_pss to extend context entry pts encoding, also
+        * respect the soft pasid_max value set by the iommu.
+        * - number of PASID bits = ecap_pss + 1
+        * - number of PASID table entries = 2^(pts + 5)
+        * Therefore, pts = ecap_pss - 4
+        * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15
+        */
+       if (ecap_pss(iommu->ecap) < 5)
+               return 0;
+
+       /* pasid_max is encoded as actual number of entries not the bits */
+       return find_first_bit((unsigned long *)&iommu->pasid_max,
+                       MAX_NR_PASID_BITS) - 5;
+}
+
 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
 {
        struct device_domain_info *info;
@@ -5021,7 +5090,9 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
 
        if (!(ctx_lo & CONTEXT_PASIDE)) {
                context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
-               context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
+               context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
+                       intel_iommu_get_pts(iommu);
+
                wmb();
                /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
                 * extended to permit requests-with-PASID if the PASIDE bit