From 50d3fb562561fcf5b745e71945834735d7386a1f Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 13 Oct 2015 20:48:21 +0100
Subject: iommu/vt-d: Use plain writeq() for dmar_writeq() where available

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/intel-iommu.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 6240063bdcac..08802b99f057 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -59,14 +59,11 @@
 #define DMAR_IRTA_REG	0xb8    /* Interrupt remapping table addr register */
 
 #define OFFSET_STRIDE		(9)
-/*
-#define dmar_readl(dmar, reg) readl(dmar + reg)
-#define dmar_readq(dmar, reg) ({ \
-		u32 lo, hi; \
-		lo = readl(dmar + reg); \
-		hi = readl(dmar + reg + 4); \
-		(((u64) hi) << 32) + lo; })
-*/
+
+#ifdef CONFIG_64BIT
+#define dmar_readq(a) readq(a)
+#define dmar_writeq(a,v) writeq(v,a)
+#else
 static inline u64 dmar_readq(void __iomem *addr)
 {
 	u32 lo, hi;
@@ -80,6 +77,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 	writel((u32)val, addr);
 	writel((u32)(val >> 32), addr + 4);
 }
+#endif
 
 #define DMAR_VER_MAJOR(v)		(((v) & 0xf0) >> 4)
 #define DMAR_VER_MINOR(v)		((v) & 0x0f)
-- 
cgit v1.2.3


From ae853ddb9ad5e7c01cad3fbf016040acd961f407 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 9 Sep 2015 11:58:59 +0100
Subject: iommu/vt-d: Introduce intel_iommu=pasid28, and pasid_enabled() macro

As long as we use an identity mapping to work around the worst of the
hardware bugs which caused us to defeature it and change the definition
of the capability bit, we *can* use PASID support on the devices which
advertised it in bit 28 of the Extended Capability Register.

Allow people to do so with 'intel_iommu=pasid28' on the command line.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/intel-iommu.c | 20 ++++++++++++++------
 include/linux/intel-iommu.h |  2 +-
 2 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 17b4744628ab..6a01735f4df3 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -497,13 +497,21 @@ static int dmar_forcedac;
 static int intel_iommu_strict;
 static int intel_iommu_superpage = 1;
 static int intel_iommu_ecs = 1;
+static int intel_iommu_pasid28;
+static int iommu_identity_mapping;
+
+#define IDENTMAP_ALL		1
+#define IDENTMAP_GFX		2
+#define IDENTMAP_AZALIA		4
 
 /* We only actually use ECS when PASID support (on the new bit 40)
  * is also advertised. Some early implementations — the ones with
  * PASID support on bit 28 — have issues even when we *only* use
  * extended root/context tables. */
+#define pasid_enabled(iommu) (ecap_pasid(iommu->ecap) || \
+			      (intel_iommu_pasid28 && ecap_broken_pasid(iommu->ecap)))
 #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
-			    ecap_pasid(iommu->ecap))
+			    pasid_enabled(iommu))
 
 int intel_iommu_gfx_mapped;
 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
@@ -566,6 +574,11 @@ static int __init intel_iommu_setup(char *str)
 			printk(KERN_INFO
 				"Intel-IOMMU: disable extended context table support\n");
 			intel_iommu_ecs = 0;
+		} else if (!strncmp(str, "pasid28", 7)) {
+			printk(KERN_INFO
+				"Intel-IOMMU: enable pre-production PASID support\n");
+			intel_iommu_pasid28 = 1;
+			iommu_identity_mapping |= IDENTMAP_GFX;
 		}
 
 		str += strcspn(str, ",");
@@ -2403,11 +2416,6 @@ found_domain:
 	return domain;
 }
 
-static int iommu_identity_mapping;
-#define IDENTMAP_ALL		1
-#define IDENTMAP_GFX		2
-#define IDENTMAP_AZALIA		4
-
 static int iommu_domain_identity_map(struct dmar_domain *domain,
 				     unsigned long long start,
 				     unsigned long long end)
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 08802b99f057..1d69c1d3aa9a 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -121,7 +121,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define ecap_srs(e)		((e >> 31) & 0x1)
 #define ecap_ers(e)		((e >> 30) & 0x1)
 #define ecap_prs(e)		((e >> 29) & 0x1)
-/* PASID support used to be on bit 28 */
+#define ecap_broken_pasid(e)	((e >> 28) & 0x1)
 #define ecap_dis(e)		((e >> 27) & 0x1)
 #define ecap_nest(e)		((e >> 26) & 0x1)
 #define ecap_mts(e)		((e >> 25) & 0x1)
-- 
cgit v1.2.3


From 8a94ade4ce6df22006b96c5c9a8d6d12fce67585 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 24 Mar 2015 14:54:56 +0000
Subject: iommu/vt-d: Add initial support for PASID tables

Add CONFIG_INTEL_IOMMU_SVM, and allocate PASID tables on supported hardware.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/Kconfig       |  8 ++++++
 drivers/iommu/Makefile      |  1 +
 drivers/iommu/intel-iommu.c | 14 ++++++++++
 drivers/iommu/intel-svm.c   | 65 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/intel-iommu.h | 15 +++++++++++
 5 files changed, 103 insertions(+)
 create mode 100644 drivers/iommu/intel-svm.c

(limited to 'include')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index d9da766719c8..e3b2c2e62e32 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -135,6 +135,14 @@ config INTEL_IOMMU
 	  and include PCI device scope covered by these DMA
 	  remapping devices.
 
+config INTEL_IOMMU_SVM
+	bool "Support for Shared Virtual Memory with Intel IOMMU"
+	depends on INTEL_IOMMU && X86
+	help
+	  Shared Virtual Memory (SVM) provides a facility for devices
+	  to access DMA resources through process address space by
+	  means of a Process Address Space ID (PASID).
+
 config INTEL_IOMMU_DEFAULT_ON
 	def_bool y
 	prompt "Enable Intel DMA Remapping Devices by default"
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index c6dcc513d711..dc6f511f45a3 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
 obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
 obj-$(CONFIG_DMAR_TABLE) += dmar.o
 obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o
+obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
 obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
 obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
 obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 6a01735f4df3..ab99fcfd9f2d 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1680,6 +1680,11 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
 
 	/* free context mapping */
 	free_context_table(iommu);
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+	if (pasid_enabled(iommu))
+		intel_svm_free_pasid_tables(iommu);
+#endif
 }
 
 static struct dmar_domain *alloc_domain(int flags)
@@ -3107,6 +3112,10 @@ static int __init init_dmars(void)
 
 		if (!ecap_pass_through(iommu->ecap))
 			hw_pass_through = 0;
+#ifdef CONFIG_INTEL_IOMMU_SVM
+		if (pasid_enabled(iommu))
+			intel_svm_alloc_pasid_tables(iommu);
+#endif
 	}
 
 	if (iommu_pass_through)
@@ -4122,6 +4131,11 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
 	if (ret)
 		goto out;
 
+#ifdef CONFIG_INTEL_IOMMU_SVM
+	if (pasid_enabled(iommu))
+		intel_svm_alloc_pasid_tables(iommu);
+#endif
+
 	if (dmaru->ignored) {
 		/*
 		 * we always have to disable PMRs or DMA may fail on this device
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
new file mode 100644
index 000000000000..5b42a95b3f80
--- /dev/null
+++ b/drivers/iommu/intel-svm.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright © 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ */
+
+#include <linux/intel-iommu.h>
+
+int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
+{
+	struct page *pages;
+	int order;
+
+	order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT;
+	if (order < 0)
+		order = 0;
+
+	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!pages) {
+		pr_warn("IOMMU: %s: Failed to allocate PASID table\n",
+			iommu->name);
+		return -ENOMEM;
+	}
+	iommu->pasid_table = page_address(pages);
+	pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order);
+
+	if (ecap_dis(iommu->ecap)) {
+		pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+		if (pages)
+			iommu->pasid_state_table = page_address(pages);
+		else
+			pr_warn("IOMMU: %s: Failed to allocate PASID state table\n",
+				iommu->name);
+	}
+
+	return 0;
+}
+
+int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
+{
+	int order;
+
+	order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT;
+	if (order < 0)
+		order = 0;
+
+	if (iommu->pasid_table) {
+		free_pages((unsigned long)iommu->pasid_table, order);
+		iommu->pasid_table = NULL;
+	}
+	if (iommu->pasid_state_table) {
+		free_pages((unsigned long)iommu->pasid_state_table, order);
+		iommu->pasid_state_table = NULL;
+	}
+	return 0;
+}
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 1d69c1d3aa9a..0f38e60d40ad 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -325,6 +325,9 @@ enum {
 #define VTD_FLAG_TRANS_PRE_ENABLED	(1 << 0)
 #define VTD_FLAG_IRQ_REMAP_PRE_ENABLED	(1 << 1)
 
+struct pasid_entry;
+struct pasid_state_entry;
+
 struct intel_iommu {
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
 	u64 		reg_phys; /* physical address of hw register set */
@@ -347,6 +350,15 @@ struct intel_iommu {
 	struct root_entry *root_entry; /* virtual address */
 
 	struct iommu_flush flush;
+#endif
+#ifdef CONFIG_INTEL_IOMMU_SVM
+	/* These are large and need to be contiguous, so we allocate just
+	 * one for now. We'll maybe want to rethink that if we truly give
+	 * devices away to userspace processes (e.g. for DPDK) and don't
+	 * want to trust that userspace will use *only* the PASID it was
+	 * told to. But while it's all driver-arbitrated, we're fine. */
+	struct pasid_entry *pasid_table;
+	struct pasid_state_entry *pasid_state_table;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
 	u32 *iommu_state; /* Store iommu states between suspend and resume.*/
@@ -387,6 +399,9 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
 extern int dmar_ir_support(void);
 
+extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
+extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
+
 extern const struct attribute_group *intel_iommu_groups[];
 
 #endif
-- 
cgit v1.2.3


From 2f26e0a9c9860db290d63e9d85c2c8c09813677f Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 9 Sep 2015 11:40:47 +0100
Subject: iommu/vt-d: Add basic SVM PASID support

This provides basic PASID support for endpoint devices, tested with a
version of the i915 driver.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/Kconfig         |   1 +
 drivers/iommu/intel-iommu.c   | 104 +++++++++++++++
 drivers/iommu/intel-svm.c     | 291 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/dma_remapping.h |   7 +
 include/linux/intel-iommu.h   |  68 +++++++++-
 include/linux/intel-svm.h     |  82 ++++++++++++
 6 files changed, 548 insertions(+), 5 deletions(-)
 create mode 100644 include/linux/intel-svm.h

(limited to 'include')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 8d23f5ed8ae2..be18b214c31e 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -139,6 +139,7 @@ config INTEL_IOMMU_SVM
 	bool "Support for Shared Virtual Memory with Intel IOMMU"
 	depends on INTEL_IOMMU && X86
 	select PCI_PASID
+	select MMU_NOTIFIER
 	help
 	  Shared Virtual Memory (SVM) provides a facility for devices
 	  to access DMA resources through process address space by
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 9995ea84e23a..60b66d27e655 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4929,6 +4929,110 @@ static void intel_iommu_remove_device(struct device *dev)
 	iommu_device_unlink(iommu->iommu_dev, dev);
 }
 
+#ifdef CONFIG_INTEL_IOMMU_SVM
+int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
+{
+	struct device_domain_info *info;
+	struct context_entry *context;
+	struct dmar_domain *domain;
+	unsigned long flags;
+	u64 ctx_lo;
+	int ret;
+
+	domain = get_valid_domain_for_dev(sdev->dev);
+	if (!domain)
+		return -EINVAL;
+
+	spin_lock_irqsave(&device_domain_lock, flags);
+	spin_lock(&iommu->lock);
+
+	ret = -EINVAL;
+	info = sdev->dev->archdata.iommu;
+	if (!info || !info->pasid_supported)
+		goto out;
+
+	context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
+	if (WARN_ON(!context))
+		goto out;
+
+	ctx_lo = context[0].lo;
+
+	sdev->did = domain->iommu_did[iommu->seq_id];
+	sdev->sid = PCI_DEVID(info->bus, info->devfn);
+
+	if (!(ctx_lo & CONTEXT_PASIDE)) {
+		context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
+		context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
+		wmb();
+		/* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
+		 * extended to permit requests-with-PASID if the PASIDE bit
+		 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
+		 * however, the PASIDE bit is ignored and requests-with-PASID
+		 * are unconditionally blocked. Which makes less sense.
+		 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
+		 * "guest mode" translation types depending on whether ATS
+		 * is available or not. Annoyingly, we can't use the new
+		 * modes *unless* PASIDE is set. */
+		if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
+			ctx_lo &= ~CONTEXT_TT_MASK;
+			if (info->ats_supported)
+				ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
+			else
+				ctx_lo |= CONTEXT_TT_PT_PASID << 2;
+		}
+		ctx_lo |= CONTEXT_PASIDE;
+		context[0].lo = ctx_lo;
+		wmb();
+		iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
+					   DMA_CCMD_MASK_NOBIT,
+					   DMA_CCMD_DEVICE_INVL);
+	}
+
+	/* Enable PASID support in the device, if it wasn't already */
+	if (!info->pasid_enabled)
+		iommu_enable_dev_iotlb(info);
+
+	if (info->ats_enabled) {
+		sdev->dev_iotlb = 1;
+		sdev->qdep = info->ats_qdep;
+		if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
+			sdev->qdep = 0;
+	}
+	ret = 0;
+
+ out:
+	spin_unlock(&iommu->lock);
+	spin_unlock_irqrestore(&device_domain_lock, flags);
+
+	return ret;
+}
+
+struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
+{
+	struct intel_iommu *iommu;
+	u8 bus, devfn;
+
+	if (iommu_dummy(dev)) {
+		dev_warn(dev,
+			 "No IOMMU translation for device; cannot enable SVM\n");
+		return NULL;
+	}
+
+	iommu = device_to_iommu(dev, &bus, &devfn);
+	if ((!iommu)) {
+		dev_dbg(dev, "No IOMMU for device; cannot enable SVM\n");
+		return NULL;
+	}
+
+	if (!iommu->pasid_table) {
+		dev_dbg(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
+		return NULL;
+	}
+
+	return iommu;
+}
+#endif /* CONFIG_INTEL_IOMMU_SVM */
+
 static const struct iommu_ops intel_iommu_ops = {
 	.capable	= intel_iommu_capable,
 	.domain_alloc	= intel_iommu_domain_alloc,
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 5b42a95b3f80..82d53e15b865 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -14,6 +14,17 @@
  */
 
 #include <linux/intel-iommu.h>
+#include <linux/mmu_notifier.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/intel-svm.h>
+#include <linux/rculist.h>
+#include <linux/pci.h>
+#include <linux/pci-ats.h>
+
+struct pasid_entry {
+	u64 val;
+};
 
 int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
 {
@@ -42,6 +53,8 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
 				iommu->name);
 	}
 
+	idr_init(&iommu->pasid_idr);
+
 	return 0;
 }
 
@@ -61,5 +74,283 @@ int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
 		free_pages((unsigned long)iommu->pasid_state_table, order);
 		iommu->pasid_state_table = NULL;
 	}
+	idr_destroy(&iommu->pasid_idr);
 	return 0;
 }
+
+static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
+				       unsigned long address, int pages, int ih)
+{
+	struct qi_desc desc;
+	int mask = ilog2(__roundup_pow_of_two(pages));
+
+	if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap) ||
+	    mask > cap_max_amask_val(svm->iommu->cap)) {
+		desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
+			QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
+		desc.high = 0;
+	} else {
+		desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
+			QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
+		desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(1) |
+			QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask);
+	}
+
+	qi_submit_sync(&desc, svm->iommu);
+
+	if (sdev->dev_iotlb) {
+		desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) |
+			QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE;
+		if (mask) {
+			unsigned long adr, delta;
+
+			/* Least significant zero bits in the address indicate the
+			 * range of the request. So mask them out according to the
+			 * size. */
+			adr = address & ((1<<(VTD_PAGE_SHIFT + mask)) - 1);
+
+			/* Now ensure that we round down further if the original
+			 * request was not aligned w.r.t. its size */
+			delta = address - adr;
+			if (delta + (pages << VTD_PAGE_SHIFT) >= (1 << (VTD_PAGE_SHIFT + mask)))
+				adr &= ~(1 << (VTD_PAGE_SHIFT + mask));
+			desc.high = QI_DEV_EIOTLB_ADDR(adr) | QI_DEV_EIOTLB_SIZE;
+		} else {
+			desc.high = QI_DEV_EIOTLB_ADDR(address);
+		}
+		qi_submit_sync(&desc, svm->iommu);
+	}
+}
+
+static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
+				  int pages, int ih)
+{
+	struct intel_svm_dev *sdev;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdev, &svm->devs, list)
+		intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
+	rcu_read_unlock();
+}
+
+static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
+			     unsigned long address, pte_t pte)
+{
+	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+
+	intel_flush_svm_range(svm, address, 1, 1);
+}
+
+static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
+				  unsigned long address)
+{
+	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+
+	intel_flush_svm_range(svm, address, 1, 1);
+}
+
+/* Pages have been freed at this point */
+static void intel_invalidate_range(struct mmu_notifier *mn,
+				   struct mm_struct *mm,
+				   unsigned long start, unsigned long end)
+{
+	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+
+	intel_flush_svm_range(svm, start,
+			      (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT , 0);
+}
+
+
+static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev)
+{
+	struct qi_desc desc;
+
+	desc.high = 0;
+	desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(svm->pasid);
+
+	qi_submit_sync(&desc, svm->iommu);
+}
+
+static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+
+	svm->iommu->pasid_table[svm->pasid].val = 0;
+
+	/* There's no need to do any flush because we can't get here if there
+	 * are any devices left anyway. */
+	WARN_ON(!list_empty(&svm->devs));
+}
+
+static const struct mmu_notifier_ops intel_mmuops = {
+	.release = intel_mm_release,
+	.change_pte = intel_change_pte,
+	.invalidate_page = intel_invalidate_page,
+	.invalidate_range = intel_invalidate_range,
+};
+
+static DEFINE_MUTEX(pasid_mutex);
+
+int intel_svm_bind_mm(struct device *dev, int *pasid)
+{
+	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+	struct intel_svm_dev *sdev;
+	struct intel_svm *svm = NULL;
+	int pasid_max;
+	int ret;
+
+	BUG_ON(pasid && !current->mm);
+
+	if (WARN_ON(!iommu))
+		return -EINVAL;
+
+	if (dev_is_pci(dev)) {
+		pasid_max = pci_max_pasids(to_pci_dev(dev));
+		if (pasid_max < 0)
+			return -EINVAL;
+	} else
+		pasid_max = 1 << 20;
+
+	mutex_lock(&pasid_mutex);
+	if (pasid) {
+		int i;
+
+		idr_for_each_entry(&iommu->pasid_idr, svm, i) {
+			if (svm->mm != current->mm)
+				continue;
+
+			if (svm->pasid >= pasid_max) {
+				dev_warn(dev,
+					 "Limited PASID width. Cannot use existing PASID %d\n",
+					 svm->pasid);
+				ret = -ENOSPC;
+				goto out;
+			}
+
+			list_for_each_entry(sdev, &svm->devs, list) {
+				if (dev == sdev->dev) {
+					sdev->users++;
+					goto success;
+				}
+			}
+
+			break;
+		}
+	}
+
+	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+	if (!sdev) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	sdev->dev = dev;
+
+	ret = intel_iommu_enable_pasid(iommu, sdev);
+	if (ret || !pasid) {
+		/* If they don't actually want to assign a PASID, this is
+		 * just an enabling check/preparation. */
+		kfree(sdev);
+		goto out;
+	}
+	/* Finish the setup now we know we're keeping it */
+	sdev->users = 1;
+	init_rcu_head(&sdev->rcu);
+
+	if (!svm) {
+		svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+		if (!svm) {
+			ret = -ENOMEM;
+			kfree(sdev);
+			goto out;
+		}
+		svm->iommu = iommu;
+
+		if (pasid_max > 2 << ecap_pss(iommu->ecap))
+			pasid_max = 2 << ecap_pss(iommu->ecap);
+
+		ret = idr_alloc(&iommu->pasid_idr, svm, 0, pasid_max - 1,
+				GFP_KERNEL);
+		if (ret < 0) {
+			kfree(svm);
+			goto out;
+		}
+		svm->pasid = ret;
+		svm->notifier.ops = &intel_mmuops;
+		svm->mm = get_task_mm(current);
+		INIT_LIST_HEAD_RCU(&svm->devs);
+		ret = -ENOMEM;
+		if (!svm->mm || (ret = mmu_notifier_register(&svm->notifier, svm->mm))) {
+			idr_remove(&svm->iommu->pasid_idr, svm->pasid);
+			kfree(svm);
+			kfree(sdev);
+			goto out;
+		}
+		iommu->pasid_table[svm->pasid].val = (u64)__pa(svm->mm->pgd) | 1;
+		wmb();
+	}
+	list_add_rcu(&sdev->list, &svm->devs);
+
+ success:
+	*pasid = svm->pasid;
+	ret = 0;
+ out:
+	mutex_unlock(&pasid_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
+
+int intel_svm_unbind_mm(struct device *dev, int pasid)
+{
+	struct intel_svm_dev *sdev;
+	struct intel_iommu *iommu;
+	struct intel_svm *svm;
+	int ret = -EINVAL;
+
+	mutex_lock(&pasid_mutex);
+	iommu = intel_svm_device_to_iommu(dev);
+	if (!iommu || !iommu->pasid_table)
+		goto out;
+
+	svm = idr_find(&iommu->pasid_idr, pasid);
+	if (!svm)
+		goto out;
+
+	list_for_each_entry(sdev, &svm->devs, list) {
+		if (dev == sdev->dev) {
+			ret = 0;
+			sdev->users--;
+			if (!sdev->users) {
+				list_del_rcu(&sdev->list);
+				/* Flush the PASID cache and IOTLB for this device.
+				 * Note that we do depend on the hardware *not* using
+				 * the PASID any more. Just as we depend on other
+				 * devices never using PASIDs that they have no right
+				 * to use. We have a *shared* PASID table, because it's
+				 * large and has to be physically contiguous. So it's
+				 * hard to be as defensive as we might like. */
+				intel_flush_pasid_dev(svm, sdev);
+				intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
+				kfree_rcu(sdev, rcu);
+
+				if (list_empty(&svm->devs)) {
+					mmu_notifier_unregister(&svm->notifier, svm->mm);
+
+					idr_remove(&svm->iommu->pasid_idr, svm->pasid);
+					mmput(svm->mm);
+					/* We mandate that no page faults may be outstanding
+					 * for the PASID when intel_svm_unbind_mm() is called.
+					 * If that is not obeyed, subtle errors will happen.
+					 * Let's make them less subtle... */
+					memset(svm, 0x6b, sizeof(*svm));
+					kfree(svm);
+				}
+			}
+			break;
+		}
+	}
+ out:
+	mutex_unlock(&pasid_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 7ac17f57250e..0e114bfabeed 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -20,6 +20,13 @@
 #define CONTEXT_TT_MULTI_LEVEL	0
 #define CONTEXT_TT_DEV_IOTLB	1
 #define CONTEXT_TT_PASS_THROUGH 2
+/* Extended context entry types */
+#define CONTEXT_TT_PT_PASID	4
+#define CONTEXT_TT_PT_PASID_DEV_IOTLB 5
+#define CONTEXT_TT_MASK (7ULL << 2)
+
+#define CONTEXT_PRS		(1ULL << 9)
+#define CONTEXT_PASIDE		(1ULL << 11)
 
 struct intel_iommu;
 struct dmar_domain;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 0f38e60d40ad..46add607567b 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -1,5 +1,9 @@
 /*
- * Copyright (c) 2006, Intel Corporation.
+ * Copyright © 2006-2015, Intel Corporation.
+ *
+ * Authors: Ashok Raj <ashok.raj@intel.com>
+ *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ *          David Woodhouse <David.Woodhouse@intel.com>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -13,10 +17,6 @@
  * You should have received a copy of the GNU General Public License along with
  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Author: Ashok Raj <ashok.raj@intel.com>
- * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  */
 
 #ifndef _INTEL_IOMMU_H_
@@ -25,7 +25,10 @@
 #include <linux/types.h>
 #include <linux/iova.h>
 #include <linux/io.h>
+#include <linux/idr.h>
 #include <linux/dma_remapping.h>
+#include <linux/mmu_notifier.h>
+#include <linux/list.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
 
@@ -251,6 +254,9 @@ enum {
 #define QI_DIOTLB_TYPE		0x3
 #define QI_IEC_TYPE		0x4
 #define QI_IWD_TYPE		0x5
+#define QI_EIOTLB_TYPE		0x6
+#define QI_PC_TYPE		0x7
+#define QI_DEIOTLB_TYPE		0x8
 
 #define QI_IEC_SELECTIVE	(((u64)1) << 4)
 #define QI_IEC_IIDEX(idx)	(((u64)(idx & 0xffff) << 32))
@@ -278,6 +284,34 @@ enum {
 #define QI_DEV_IOTLB_SIZE	1
 #define QI_DEV_IOTLB_MAX_INVS	32
 
+#define QI_PC_PASID(pasid)	(((u64)pasid) << 32)
+#define QI_PC_DID(did)		(((u64)did) << 16)
+#define QI_PC_GRAN(gran)	(((u64)gran) << 4)
+
+#define QI_PC_ALL_PASIDS	(QI_PC_TYPE | QI_PC_GRAN(0))
+#define QI_PC_PASID_SEL		(QI_PC_TYPE | QI_PC_GRAN(1))
+
+#define QI_EIOTLB_ADDR(addr)	((u64)(addr) & VTD_PAGE_MASK)
+#define QI_EIOTLB_GL(gl)	(((u64)gl) << 7)
+#define QI_EIOTLB_IH(ih)	(((u64)ih) << 6)
+#define QI_EIOTLB_AM(am)	(((u64)am))
+#define QI_EIOTLB_PASID(pasid) 	(((u64)pasid) << 32)
+#define QI_EIOTLB_DID(did)	(((u64)did) << 16)
+#define QI_EIOTLB_GRAN(gran) 	(((u64)gran) << 4)
+
+#define QI_DEV_EIOTLB_ADDR(a)	((u64)(a) & VTD_PAGE_MASK)
+#define QI_DEV_EIOTLB_SIZE	(((u64)1) << 11)
+#define QI_DEV_EIOTLB_GLOB(g)	((u64)g)
+#define QI_DEV_EIOTLB_PASID(p)	(((u64)p) << 32)
+#define QI_DEV_EIOTLB_SID(sid)	((u64)((sid) & 0xffff) << 32)
+#define QI_DEV_EIOTLB_QDEP(qd)	(((qd) & 0x1f) << 16)
+#define QI_DEV_EIOTLB_MAX_INVS	32
+
+#define QI_GRAN_ALL_ALL			0
+#define QI_GRAN_NONG_ALL		1
+#define QI_GRAN_NONG_PASID		2
+#define QI_GRAN_PSI_PASID		3
+
 struct qi_desc {
 	u64 low, high;
 };
@@ -359,6 +393,7 @@ struct intel_iommu {
 	 * told to. But while it's all driver-arbitrated, we're fine. */
 	struct pasid_entry *pasid_table;
 	struct pasid_state_entry *pasid_state_table;
+	struct idr pasid_idr;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
 	u32 *iommu_state; /* Store iommu states between suspend and resume.*/
@@ -399,9 +434,32 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
 extern int dmar_ir_support(void);
 
+#ifdef CONFIG_INTEL_IOMMU_SVM
 extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
 extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
 
+struct intel_svm_dev {
+	struct list_head list;
+	struct rcu_head rcu;
+	struct device *dev;
+	int users;
+	u16 did;
+	u16 dev_iotlb:1;
+	u16 sid, qdep;
+};
+
+struct intel_svm {
+	struct mmu_notifier notifier;
+	struct mm_struct *mm;
+	struct intel_iommu *iommu;
+	int pasid;
+	struct list_head devs;
+};
+
+extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev);
+extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
+#endif
+
 extern const struct attribute_group *intel_iommu_groups[];
 
 #endif
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
new file mode 100644
index 000000000000..42f80ad7a7a0
--- /dev/null
+++ b/include/linux/intel-svm.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2015 Intel Corporation.
+ *
+ * Authors: David Woodhouse <David.Woodhouse@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __INTEL_SVM_H__
+#define __INTEL_SVM_H__
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+
+struct device;
+
+/**
+ * intel_svm_bind_mm() - Bind the current process to a PASID
+ * @dev:	Device to be granted acccess
+ * @pasid:	Address for allocated PASID
+ *
+ * This function attempts to enable PASID support for the given device.
+ * If the @pasid argument is non-%NULL, a PASID is allocated for access
+ * to the MM of the current process.
+ *
+ * By using a %NULL value for the @pasid argument, this function can
+ * be used to simply validate that PASID support is available for the
+ * given device — i.e. that it is behind an IOMMU which has the
+ * requisite support, and is enabled.
+ *
+ * Page faults are handled transparently by the IOMMU code, and there
+ * should be no need for the device driver to be involved. If a page
+ * fault cannot be handled (i.e. is an invalid address rather than
+ * just needs paging in), then the page request will be completed by
+ * the core IOMMU code with appropriate status, and the device itself
+ * can then report the resulting fault to its driver via whatever
+ * mechanism is appropriate.
+ *
+ * Multiple calls from the same process may result in the same PASID
+ * being re-used. A reference count is kept.
+ */
+extern int intel_svm_bind_mm(struct device *dev, int *pasid);
+
+/**
+ * intel_svm_unbind_mm() - Unbind a specified PASID
+ * @dev:	Device for which PASID was allocated
+ * @pasid:	PASID value to be unbound
+ *
+ * This function allows a PASID to be retired when the device no
+ * longer requires access to the address space of a given process.
+ *
+ * If the use count for the PASID in question reaches zero, the
+ * PASID is revoked and may no longer be used by hardware.
+ *
+ * Device drivers are required to ensure that no access (including
+ * page requests) is currently outstanding for the PASID in question,
+ * before calling this function.
+ */
+extern int intel_svm_unbind_mm(struct device *dev, int pasid);
+
+#else /* CONFIG_INTEL_IOMMU_SVM */
+
+static inline int intel_svm_bind_mm(struct device *dev, int *pasid)
+{
+	return -ENOSYS;
+}
+
+static inline int intel_svm_unbind_mm(struct device *dev, int pasid)
+{
+	BUG();
+}
+#endif /* CONFIG_INTEL_IOMMU_SVM */
+
+#define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL))
+
+#endif /* __INTEL_SVM_H__ */
-- 
cgit v1.2.3


From 907fea3491d52063bb37b1f1ce0cf8a4ae70944c Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 13 Oct 2015 14:11:13 +0100
Subject: iommu/vt-d: Implement deferred invalidate for SVM

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/intel-iommu.c   | 2 ++
 drivers/iommu/intel-svm.c     | 9 +++++++++
 include/linux/dma_remapping.h | 1 +
 3 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 60b66d27e655..58ecd52af43b 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4981,6 +4981,8 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
 				ctx_lo |= CONTEXT_TT_PT_PASID << 2;
 		}
 		ctx_lo |= CONTEXT_PASIDE;
+		if (iommu->pasid_state_table)
+			ctx_lo |= CONTEXT_DINVE;
 		context[0].lo = ctx_lo;
 		wmb();
 		iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 82d53e15b865..a64720b5bd34 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -26,6 +26,10 @@ struct pasid_entry {
 	u64 val;
 };
 
+struct pasid_state_entry {
+	u64 val;
+};
+
 int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
 {
 	struct page *pages;
@@ -127,6 +131,11 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
 {
 	struct intel_svm_dev *sdev;
 
+	/* Try deferred invalidate if available */
+	if (svm->iommu->pasid_state_table &&
+	    !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63))
+		return;
+
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdev, &svm->devs, list)
 		intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 0e114bfabeed..187c10299722 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -25,6 +25,7 @@
 #define CONTEXT_TT_PT_PASID_DEV_IOTLB 5
 #define CONTEXT_TT_MASK (7ULL << 2)
 
+#define CONTEXT_DINVE		(1ULL << 8)
 #define CONTEXT_PRS		(1ULL << 9)
 #define CONTEXT_PASIDE		(1ULL << 11)
 
-- 
cgit v1.2.3


From 1208225cf48fa3b170b6dfe7369f15c295260755 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 7 Oct 2015 15:37:03 +0100
Subject: iommu/vt-d: Generalise DMAR MSI setup to allow for page request
 events

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/dmar.c        | 42 +++++++++++++++++++++++++++++++-----------
 include/linux/intel-iommu.h | 10 +++++++++-
 2 files changed, 40 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 8757f8dfc4e5..80e3c176008e 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1086,6 +1086,11 @@ static void free_iommu(struct intel_iommu *iommu)
 	iommu_device_destroy(iommu->iommu_dev);
 
 	if (iommu->irq) {
+		if (iommu->pr_irq) {
+			free_irq(iommu->pr_irq, iommu);
+			dmar_free_hwirq(iommu->pr_irq);
+			iommu->pr_irq = 0;
+		}
 		free_irq(iommu->irq, iommu);
 		dmar_free_hwirq(iommu->irq);
 		iommu->irq = 0;
@@ -1493,53 +1498,68 @@ static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
 	}
 }
 
+
+static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
+{
+	if (iommu->irq == irq)
+		return DMAR_FECTL_REG;
+	else if (iommu->pr_irq == irq)
+		return DMAR_PECTL_REG;
+	else
+		BUG();
+}
+
 void dmar_msi_unmask(struct irq_data *data)
 {
 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
+	int reg = dmar_msi_reg(iommu, data->irq);
 	unsigned long flag;
 
 	/* unmask it */
 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
-	writel(0, iommu->reg + DMAR_FECTL_REG);
+	writel(0, iommu->reg + reg);
 	/* Read a reg to force flush the post write */
-	readl(iommu->reg + DMAR_FECTL_REG);
+	readl(iommu->reg + reg);
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
 void dmar_msi_mask(struct irq_data *data)
 {
-	unsigned long flag;
 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
+	int reg = dmar_msi_reg(iommu, data->irq);
+	unsigned long flag;
 
 	/* mask it */
 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
-	writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
+	writel(DMA_FECTL_IM, iommu->reg + reg);
 	/* Read a reg to force flush the post write */
-	readl(iommu->reg + DMAR_FECTL_REG);
+	readl(iommu->reg + reg);
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
 void dmar_msi_write(int irq, struct msi_msg *msg)
 {
 	struct intel_iommu *iommu = irq_get_handler_data(irq);
+	int reg = dmar_msi_reg(iommu, irq);
 	unsigned long flag;
 
 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
-	writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
-	writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
-	writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
+	writel(msg->data, iommu->reg + reg + 4);
+	writel(msg->address_lo, iommu->reg + reg + 8);
+	writel(msg->address_hi, iommu->reg + reg + 12);
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
 void dmar_msi_read(int irq, struct msi_msg *msg)
 {
 	struct intel_iommu *iommu = irq_get_handler_data(irq);
+	int reg = dmar_msi_reg(iommu, irq);
 	unsigned long flag;
 
 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
-	msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
-	msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
-	msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
+	msg->data = readl(iommu->reg + reg + 4);
+	msg->address_lo = readl(iommu->reg + reg + 8);
+	msg->address_hi = readl(iommu->reg + reg + 12);
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 46add607567b..f16a2b9124d1 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -60,6 +60,14 @@
 #define DMAR_IQA_REG	0x90	/* Invalidation queue addr register */
 #define DMAR_ICS_REG	0x9c	/* Invalidation complete status register */
 #define DMAR_IRTA_REG	0xb8    /* Interrupt remapping table addr register */
+#define DMAR_PQH_REG	0xc0	/* Page request queue head register */
+#define DMAR_PQT_REG	0xc8	/* Page request queue tail register */
+#define DMAR_PQA_REG	0xd0	/* Page request queue address register */
+#define DMAR_PRS_REG	0xdc	/* Page request status register */
+#define DMAR_PECTL_REG	0xe0	/* Page request event control register */
+#define	DMAR_PEDATA_REG	0xe4	/* Page request event interrupt data register */
+#define	DMAR_PEADDR_REG	0xe8	/* Page request event interrupt addr register */
+#define	DMAR_PEUADDR_REG 0xec	/* Page request event Upper address register */
 
 #define OFFSET_STRIDE		(9)
 
@@ -373,7 +381,7 @@ struct intel_iommu {
 	int		seq_id;	/* sequence id of the iommu */
 	int		agaw; /* agaw of this iommu */
 	int		msagaw; /* max sagaw of this iommu */
-	unsigned int 	irq;
+	unsigned int 	irq, pr_irq;
 	u16		segment;     /* PCI segment# */
 	unsigned char 	name[13];    /* Device Name */
 
-- 
cgit v1.2.3


From a222a7f0bb6c94c31cc9c755110593656f19de89 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 7 Oct 2015 23:35:18 +0100
Subject: iommu/vt-d: Implement page request handling

Largely based on the driver-mode implementation by Jesse Barnes.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/intel-iommu.c |  22 +++++-
 drivers/iommu/intel-svm.c   | 173 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/intel-iommu.h |  26 +++++++
 3 files changed, 220 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 58ecd52af43b..68d71f81dfa0 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1698,8 +1698,11 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
 	free_context_table(iommu);
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
-	if (pasid_enabled(iommu))
+	if (pasid_enabled(iommu)) {
+		if (ecap_prs(iommu->ecap))
+			intel_svm_finish_prq(iommu);
 		intel_svm_free_pasid_tables(iommu);
+	}
 #endif
 }
 
@@ -3243,6 +3246,13 @@ domains_done:
 
 		iommu_flush_write_buffer(iommu);
 
+#ifdef CONFIG_INTEL_IOMMU_SVM
+		if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
+			ret = intel_svm_enable_prq(iommu);
+			if (ret)
+				goto free_iommu;
+		}
+#endif
 		ret = dmar_set_interrupt(iommu);
 		if (ret)
 			goto free_iommu;
@@ -4187,6 +4197,14 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
 
 	intel_iommu_init_qi(iommu);
 	iommu_flush_write_buffer(iommu);
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+	if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
+		ret = intel_svm_enable_prq(iommu);
+		if (ret)
+			goto disable_iommu;
+	}
+#endif
 	ret = dmar_set_interrupt(iommu);
 	if (ret)
 		goto disable_iommu;
@@ -4983,6 +5001,8 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
 		ctx_lo |= CONTEXT_PASIDE;
 		if (iommu->pasid_state_table)
 			ctx_lo |= CONTEXT_DINVE;
+		if (info->pri_supported)
+			ctx_lo |= CONTEXT_PRS;
 		context[0].lo = ctx_lo;
 		wmb();
 		iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index a64720b5bd34..0e8654282484 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -21,6 +21,10 @@
 #include <linux/rculist.h>
 #include <linux/pci.h>
 #include <linux/pci-ats.h>
+#include <linux/dmar.h>
+#include <linux/interrupt.h>
+
+static irqreturn_t prq_event_thread(int irq, void *d);
 
 struct pasid_entry {
 	u64 val;
@@ -82,6 +86,66 @@ int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
 	return 0;
 }
 
+#define PRQ_ORDER 0
+
+int intel_svm_enable_prq(struct intel_iommu *iommu)
+{
+	struct page *pages;
+	int irq, ret;
+
+	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
+	if (!pages) {
+		pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
+			iommu->name);
+		return -ENOMEM;
+	}
+	iommu->prq = page_address(pages);
+
+	irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
+	if (irq <= 0) {
+		pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
+		       iommu->name);
+		ret = -EINVAL;
+	err:
+		free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+		iommu->prq = NULL;
+		return ret;
+	}
+	iommu->pr_irq = irq;
+
+	snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
+
+	ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
+				   iommu->prq_name, iommu);
+	if (ret) {
+		pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
+		       iommu->name);
+		dmar_free_hwirq(irq);
+		goto err;
+	}
+	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
+	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
+	dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
+
+	return 0;
+}
+
+int intel_svm_finish_prq(struct intel_iommu *iommu)
+{
+	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
+	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
+	dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
+
+	free_irq(iommu->pr_irq, iommu);
+	dmar_free_hwirq(iommu->pr_irq);
+	iommu->pr_irq = 0;
+
+	free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+	iommu->prq = NULL;
+
+	return 0;
+}
+
 static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
 				       unsigned long address, int pages, int ih)
 {
@@ -363,3 +427,112 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
+
+/* Page request queue descriptor */
+struct page_req_dsc {
+	u64 srr:1;
+	u64 bof:1;
+	u64 pasid_present:1;
+	u64 lpig:1;
+	u64 pasid:20;
+	u64 bus:8;
+	u64 private:23;
+	u64 prg_index:9;
+	u64 rd_req:1;
+	u64 wr_req:1;
+	u64 exe_req:1;
+	u64 priv_req:1;
+	u64 devfn:8;
+	u64 addr:52;
+};
+
+#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10)
+static irqreturn_t prq_event_thread(int irq, void *d)
+{
+	struct intel_iommu *iommu = d;
+	struct intel_svm *svm = NULL;
+	int head, tail, handled = 0;
+
+	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
+	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+	while (head != tail) {
+		struct vm_area_struct *vma;
+		struct page_req_dsc *req;
+		struct qi_desc resp;
+		int ret, result;
+		u64 address;
+
+		handled = 1;
+
+		req = &iommu->prq[head / sizeof(*req)];
+
+		result = QI_RESP_FAILURE;
+		address = req->addr << PAGE_SHIFT;
+		if (!req->pasid_present) {
+			pr_err("%s: Page request without PASID: %08llx %08llx\n",
+			       iommu->name, ((unsigned long long *)req)[0],
+			       ((unsigned long long *)req)[1]);
+			goto bad_req;
+		}
+
+		if (!svm || svm->pasid != req->pasid) {
+			rcu_read_lock();
+			svm = idr_find(&iommu->pasid_idr, req->pasid);
+			/* It *can't* go away, because the driver is not permitted
+			 * to unbind the mm while any page faults are outstanding.
+			 * So we only need RCU to protect the internal idr code. */
+			rcu_read_unlock();
+
+			if (!svm) {
+				pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
+				       iommu->name, req->pasid, ((unsigned long long *)req)[0],
+				       ((unsigned long long *)req)[1]);
+				goto bad_req;
+			}
+		}
+
+		result = QI_RESP_INVALID;
+		down_read(&svm->mm->mmap_sem);
+		vma = find_extend_vma(svm->mm, address);
+		if (!vma || address < vma->vm_start)
+			goto invalid;
+
+		ret = handle_mm_fault(svm->mm, vma, address,
+				      req->wr_req ? FAULT_FLAG_WRITE : 0);
+		if (ret & VM_FAULT_ERROR)
+			goto invalid;
+
+		result = QI_RESP_SUCCESS;
+	invalid:
+		up_read(&svm->mm->mmap_sem);
+	bad_req:
+		/* Accounting for major/minor faults? */
+
+		if (req->lpig) {
+			/* Page Group Response */
+			resp.low = QI_PGRP_PASID(req->pasid) |
+				QI_PGRP_DID((req->bus << 8) | req->devfn) |
+				QI_PGRP_PASID_P(req->pasid_present) |
+				QI_PGRP_RESP_TYPE;
+			resp.high = QI_PGRP_IDX(req->prg_index) |
+				QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result);
+
+			qi_submit_sync(&resp, svm->iommu);
+		} else if (req->srr) {
+			/* Page Stream Response */
+			resp.low = QI_PSTRM_IDX(req->prg_index) |
+				QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) |
+				QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE;
+			resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) |
+				QI_PSTRM_RESP_CODE(result);
+
+			qi_submit_sync(&resp, svm->iommu);
+		}
+
+		head = (head + sizeof(*req)) & PRQ_RING_MASK;
+	}
+
+	dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
+
+	return IRQ_RETVAL(handled);
+}
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index f16a2b9124d1..e5b80d31eb1b 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -265,6 +265,8 @@ enum {
 #define QI_EIOTLB_TYPE		0x6
 #define QI_PC_TYPE		0x7
 #define QI_DEIOTLB_TYPE		0x8
+#define QI_PGRP_RESP_TYPE	0x9
+#define QI_PSTRM_RESP_TYPE	0xa
 
 #define QI_IEC_SELECTIVE	(((u64)1) << 4)
 #define QI_IEC_IIDEX(idx)	(((u64)(idx & 0xffff) << 32))
@@ -315,6 +317,25 @@ enum {
 #define QI_DEV_EIOTLB_QDEP(qd)	(((qd) & 0x1f) << 16)
 #define QI_DEV_EIOTLB_MAX_INVS	32
 
+#define QI_PGRP_IDX(idx)	(((u64)(idx)) << 55)
+#define QI_PGRP_PRIV(priv)	(((u64)(priv)) << 32)
+#define QI_PGRP_RESP_CODE(res)	((u64)(res))
+#define QI_PGRP_PASID(pasid)	(((u64)(pasid)) << 32)
+#define QI_PGRP_DID(did)	(((u64)(did)) << 16)
+#define QI_PGRP_PASID_P(p)	(((u64)(p)) << 4)
+
+#define QI_PSTRM_ADDR(addr)	(((u64)(addr)) & VTD_PAGE_MASK)
+#define QI_PSTRM_DEVFN(devfn)	(((u64)(devfn)) << 4)
+#define QI_PSTRM_RESP_CODE(res)	((u64)(res))
+#define QI_PSTRM_IDX(idx)	(((u64)(idx)) << 55)
+#define QI_PSTRM_PRIV(priv)	(((u64)(priv)) << 32)
+#define QI_PSTRM_BUS(bus)	(((u64)(bus)) << 24)
+#define QI_PSTRM_PASID(pasid)	(((u64)(pasid)) << 4)
+
+#define QI_RESP_SUCCESS		0x0
+#define QI_RESP_INVALID		0x1
+#define QI_RESP_FAILURE		0xf
+
 #define QI_GRAN_ALL_ALL			0
 #define QI_GRAN_NONG_ALL		1
 #define QI_GRAN_NONG_PASID		2
@@ -369,6 +390,7 @@ enum {
 
 struct pasid_entry;
 struct pasid_state_entry;
+struct page_req_dsc;
 
 struct intel_iommu {
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
@@ -401,6 +423,8 @@ struct intel_iommu {
 	 * told to. But while it's all driver-arbitrated, we're fine. */
 	struct pasid_entry *pasid_table;
 	struct pasid_state_entry *pasid_state_table;
+	struct page_req_dsc *prq;
+	unsigned char prq_name[16];    /* Name for PRQ interrupt */
 	struct idr pasid_idr;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
@@ -445,6 +469,8 @@ extern int dmar_ir_support(void);
 #ifdef CONFIG_INTEL_IOMMU_SVM
 extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
 extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
+extern int intel_svm_enable_prq(struct intel_iommu *iommu);
+extern int intel_svm_finish_prq(struct intel_iommu *iommu);
 
 struct intel_svm_dev {
 	struct list_head list;
-- 
cgit v1.2.3


From 0204a49609824163092c32a8aeb073f7e9acc76d Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 13 Oct 2015 17:18:10 +0100
Subject: iommu/vt-d: Add callback to device driver on page faults

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/intel-svm.c   | 26 +++++++++++++++++++++++++-
 include/linux/intel-iommu.h |  3 +++
 include/linux/intel-svm.h   | 21 ++++++++++++++++++---
 3 files changed, 46 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 0e8654282484..006e95dd64ae 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -264,7 +264,7 @@ static const struct mmu_notifier_ops intel_mmuops = {
 
 static DEFINE_MUTEX(pasid_mutex);
 
-int intel_svm_bind_mm(struct device *dev, int *pasid)
+int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
 {
 	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
 	struct intel_svm_dev *sdev;
@@ -302,6 +302,10 @@ int intel_svm_bind_mm(struct device *dev, int *pasid)
 
 			list_for_each_entry(sdev, &svm->devs, list) {
 				if (dev == sdev->dev) {
+					if (sdev->ops != ops) {
+						ret = -EBUSY;
+						goto out;
+					}
 					sdev->users++;
 					goto success;
 				}
@@ -327,6 +331,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid)
 	}
 	/* Finish the setup now we know we're keeping it */
 	sdev->users = 1;
+	sdev->ops = ops;
 	init_rcu_head(&sdev->rcu);
 
 	if (!svm) {
@@ -456,6 +461,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
 	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
 	while (head != tail) {
+		struct intel_svm_dev *sdev;
 		struct vm_area_struct *vma;
 		struct page_req_dsc *req;
 		struct qi_desc resp;
@@ -507,6 +513,24 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 		up_read(&svm->mm->mmap_sem);
 	bad_req:
 		/* Accounting for major/minor faults? */
+		rcu_read_lock();
+		list_for_each_entry_rcu(sdev, &svm->devs, list) {
+			if (sdev->sid == PCI_DEVID(req->bus, req->devfn));
+				break;
+		}
+		/* Other devices can go away, but the drivers are not permitted
+		 * to unbind while any page faults might be in flight. So it's
+		 * OK to drop the 'lock' here now we have it. */
+		rcu_read_unlock();
+
+		if (WARN_ON(&sdev->list == &svm->devs))
+			sdev = NULL;
+
+		if (sdev && sdev->ops && sdev->ops->fault_cb) {
+			int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
+				(req->wr_req << 1) | (req->exe_req);
+			sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result);
+		}
 
 		if (req->lpig) {
 			/* Page Group Response */
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index e5b80d31eb1b..57be14fce640 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -472,10 +472,13 @@ extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
 extern int intel_svm_enable_prq(struct intel_iommu *iommu);
 extern int intel_svm_finish_prq(struct intel_iommu *iommu);
 
+struct svm_dev_ops;
+
 struct intel_svm_dev {
 	struct list_head list;
 	struct rcu_head rcu;
 	struct device *dev;
+	struct svm_dev_ops *ops;
 	int users;
 	u16 did;
 	u16 dev_iotlb:1;
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 42f80ad7a7a0..239f8a13a332 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -20,10 +20,23 @@
 
 struct device;
 
+struct svm_dev_ops {
+	void (*fault_cb)(struct device *dev, int pasid, u64 address,
+			 u32 private, int rwxp, int response);
+};
+
+/* Values for rxwp in fault_cb callback */
+#define SVM_REQ_READ	(1<<3)
+#define SVM_REQ_WRITE	(1<<2)
+#define SVM_REQ_EXEC	(1<<1)
+#define SVM_REQ_PRIV	(1<<0)
+
 /**
  * intel_svm_bind_mm() - Bind the current process to a PASID
  * @dev:	Device to be granted acccess
  * @pasid:	Address for allocated PASID
+ * @flags:	Flags. Later for requesting supervisor mode, etc.
+ * @ops:	Callbacks to device driver
  *
  * This function attempts to enable PASID support for the given device.
  * If the @pasid argument is non-%NULL, a PASID is allocated for access
@@ -45,7 +58,8 @@ struct device;
  * Multiple calls from the same process may result in the same PASID
  * being re-used. A reference count is kept.
  */
-extern int intel_svm_bind_mm(struct device *dev, int *pasid);
+extern int intel_svm_bind_mm(struct device *dev, int *pasid, int flags,
+			     struct svm_dev_ops *ops);
 
 /**
  * intel_svm_unbind_mm() - Unbind a specified PASID
@@ -66,7 +80,8 @@ extern int intel_svm_unbind_mm(struct device *dev, int pasid);
 
 #else /* CONFIG_INTEL_IOMMU_SVM */
 
-static inline int intel_svm_bind_mm(struct device *dev, int *pasid)
+static inline int intel_svm_bind_mm(struct device *dev, int *pasid,
+				    int flags, struct svm_dev_ops *ops)
 {
 	return -ENOSYS;
 }
@@ -77,6 +92,6 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid)
 }
 #endif /* CONFIG_INTEL_IOMMU_SVM */
 
-#define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL))
+#define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL, 0, NULL))
 
 #endif /* __INTEL_SVM_H__ */
-- 
cgit v1.2.3


From 569e4f7782fb92d0e1b395b5fb01de642dd74dcf Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Thu, 15 Oct 2015 13:59:14 +0100
Subject: iommu/vt-d: Implement SVM_FLAG_PRIVATE_PASID to allocate unique
 PASIDs

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/intel-svm.c   |  6 ++++--
 include/linux/intel-iommu.h |  1 +
 include/linux/intel-svm.h   | 11 +++++++++++
 3 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 006e95dd64ae..89d4d47d0ab3 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -285,11 +285,12 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		pasid_max = 1 << 20;
 
 	mutex_lock(&pasid_mutex);
-	if (pasid) {
+	if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
 		int i;
 
 		idr_for_each_entry(&iommu->pasid_idr, svm, i) {
-			if (svm->mm != current->mm)
+			if (svm->mm != current->mm ||
+			    (svm->flags & SVM_FLAG_PRIVATE_PASID))
 				continue;
 
 			if (svm->pasid >= pasid_max) {
@@ -355,6 +356,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		svm->pasid = ret;
 		svm->notifier.ops = &intel_mmuops;
 		svm->mm = get_task_mm(current);
+		svm->flags = flags;
 		INIT_LIST_HEAD_RCU(&svm->devs);
 		ret = -ENOMEM;
 		if (!svm->mm || (ret = mmu_notifier_register(&svm->notifier, svm->mm))) {
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 57be14fce640..821273ca4873 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -489,6 +489,7 @@ struct intel_svm {
 	struct mmu_notifier notifier;
 	struct mm_struct *mm;
 	struct intel_iommu *iommu;
+	int flags;
 	int pasid;
 	struct list_head devs;
 };
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 239f8a13a332..dd94ab45a4db 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -31,6 +31,17 @@ struct svm_dev_ops {
 #define SVM_REQ_EXEC	(1<<1)
 #define SVM_REQ_PRIV	(1<<0)
 
+
+/*
+ * The SVM_FLAG_PRIVATE_PASID flag requests a PASID which is *not* the "main"
+ * PASID for the current process. Even if a PASID already exists, a new one
+ * will be allocated. And the PASID allocated with SVM_FLAG_PRIVATE_PASID
+ * will not be given to subsequent callers. This facility allows a driver to
+ * disambiguate between multiple device contexts which access the same MM,
+ * if there is no other way to do so. It should be used sparingly, if at all.
+ */
+#define SVM_FLAG_PRIVATE_PASID	(1<<0)
+
 /**
  * intel_svm_bind_mm() - Bind the current process to a PASID
  * @dev:	Device to be granted acccess
-- 
cgit v1.2.3


From 5cec753709adf1a20c8b15edf8e5245cf4fd4e82 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Thu, 15 Oct 2015 15:52:15 +0100
Subject: iommu/vt-d: Implement SVM_FLAG_SUPERVISOR_MODE for kernel access
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is only usable for the static 1:1 mapping of physical memory.

Any access to vmalloc or module regions will require some way of doing
an IOTLB flush. It's theoretically possible to hook into the
tlb_flush_kernel_range() function, but that seems like overkill — most
of the addresses accessed through a kernel PASID *will* be in the 1:1
mapping.

If we really need to allow access to more interesting kernel regions,
then the answer will probably be an explicit IOTLB flush call after use,
akin to the DMA API's unmap function.

In fact, it might be worth introducing that sooner rather than later, and
making it just BUG() if the address isn't in the static 1:1 mapping.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/iommu/intel-svm.c | 43 +++++++++++++++++++++++++++++++------------
 include/linux/intel-svm.h | 15 ++++++++++++++-
 2 files changed, 45 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 89d4d47d0ab3..817be769e94f 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -269,11 +269,10 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
 	struct intel_svm_dev *sdev;
 	struct intel_svm *svm = NULL;
+	struct mm_struct *mm = NULL;
 	int pasid_max;
 	int ret;
 
-	BUG_ON(pasid && !current->mm);
-
 	if (WARN_ON(!iommu))
 		return -EINVAL;
 
@@ -284,12 +283,20 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 	} else
 		pasid_max = 1 << 20;
 
+	if ((flags & SVM_FLAG_SUPERVISOR_MODE)) {
+		if (!ecap_srs(iommu->ecap))
+			return -EINVAL;
+	} else if (pasid) {
+		mm = get_task_mm(current);
+		BUG_ON(!mm);
+	}
+
 	mutex_lock(&pasid_mutex);
 	if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
 		int i;
 
 		idr_for_each_entry(&iommu->pasid_idr, svm, i) {
-			if (svm->mm != current->mm ||
+			if (svm->mm != mm ||
 			    (svm->flags & SVM_FLAG_PRIVATE_PASID))
 				continue;
 
@@ -355,17 +362,22 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		}
 		svm->pasid = ret;
 		svm->notifier.ops = &intel_mmuops;
-		svm->mm = get_task_mm(current);
+		svm->mm = mm;
 		svm->flags = flags;
 		INIT_LIST_HEAD_RCU(&svm->devs);
 		ret = -ENOMEM;
-		if (!svm->mm || (ret = mmu_notifier_register(&svm->notifier, svm->mm))) {
-			idr_remove(&svm->iommu->pasid_idr, svm->pasid);
-			kfree(svm);
-			kfree(sdev);
-			goto out;
-		}
-		iommu->pasid_table[svm->pasid].val = (u64)__pa(svm->mm->pgd) | 1;
+		if (mm) {
+			ret = mmu_notifier_register(&svm->notifier, mm);
+			if (ret) {
+				idr_remove(&svm->iommu->pasid_idr, svm->pasid);
+				kfree(svm);
+				kfree(sdev);
+				goto out;
+			}
+			iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1;
+			mm = NULL;
+		} else
+			iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
 		wmb();
 	}
 	list_add_rcu(&sdev->list, &svm->devs);
@@ -375,6 +387,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 	ret = 0;
  out:
 	mutex_unlock(&pasid_mutex);
+	if (mm)
+		mmput(mm);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
@@ -416,7 +430,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 					mmu_notifier_unregister(&svm->notifier, svm->mm);
 
 					idr_remove(&svm->iommu->pasid_idr, svm->pasid);
-					mmput(svm->mm);
+					if (svm->mm)
+						mmput(svm->mm);
 					/* We mandate that no page faults may be outstanding
 					 * for the PASID when intel_svm_unbind_mm() is called.
 					 * If that is not obeyed, subtle errors will happen.
@@ -500,6 +515,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 		}
 
 		result = QI_RESP_INVALID;
+		/* Since we're using init_mm.pgd directly, we should never take
+		 * any faults on kernel addresses. */
+		if (!svm->mm)
+			goto bad_req;
 		down_read(&svm->mm->mmap_sem);
 		vma = find_extend_vma(svm->mm, address);
 		if (!vma || address < vma->vm_start)
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index dd94ab45a4db..0a48ccff24ae 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -40,7 +40,20 @@ struct svm_dev_ops {
  * disambiguate between multiple device contexts which access the same MM,
  * if there is no other way to do so. It should be used sparingly, if at all.
  */
-#define SVM_FLAG_PRIVATE_PASID	(1<<0)
+#define SVM_FLAG_PRIVATE_PASID		(1<<0)
+
+/*
+ * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only
+ * for access to kernel addresses. No IOTLB flushes are automatically done
+ * for kernel mappings; it is valid only for access to the kernel's static
+ * 1:1 mapping of physical memory — not to vmalloc or even module mappings.
+ * A future API addition may permit the use of such ranges, by means of an
+ * explicit IOTLB flush call (akin to the DMA API's unmap method).
+ *
+ * It is unlikely that we will ever hook into flush_tlb_kernel_range() to
+ * do such IOTLB flushes automatically.
+ */
+#define SVM_FLAG_SUPERVISOR_MODE	(1<<1)
 
 /**
  * intel_svm_bind_mm() - Bind the current process to a PASID
-- 
cgit v1.2.3


From da4689c0263ee5f4eee64e166a6bee6a68b9242e Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Tue, 27 Oct 2015 08:36:08 +0900
Subject: iommu/vt-d: Expose struct svm_dev_ops without CONFIG_INTEL_IOMMU_SVM

The point in providing an inline version of intel_svm_bind_mm() which
just returns -ENOSYS is that people are supposed to be able to *use* it
and just see that it fails. So we need to let them have a definition of
struct svm_dev_ops (and the flags) too.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/intel-svm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 0a48ccff24ae..3c25794042f9 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -16,8 +16,6 @@
 #ifndef __INTEL_SVM_H__
 #define __INTEL_SVM_H__
 
-#ifdef CONFIG_INTEL_IOMMU_SVM
-
 struct device;
 
 struct svm_dev_ops {
@@ -55,6 +53,8 @@ struct svm_dev_ops {
  */
 #define SVM_FLAG_SUPERVISOR_MODE	(1<<1)
 
+#ifdef CONFIG_INTEL_IOMMU_SVM
+
 /**
  * intel_svm_bind_mm() - Bind the current process to a PASID
  * @dev:	Device to be granted acccess
-- 
cgit v1.2.3