summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel')
-rw-r--r--drivers/iommu/intel/Kconfig6
-rw-r--r--drivers/iommu/intel/iommu.c931
-rw-r--r--drivers/iommu/intel/iommu.h99
-rw-r--r--drivers/iommu/intel/nested.c7
-rw-r--r--drivers/iommu/intel/pasid.c44
-rw-r--r--drivers/iommu/intel/pasid.h1
-rw-r--r--drivers/iommu/intel/svm.c1
7 files changed, 218 insertions, 871 deletions
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index f2f538c70650..5471f814e073 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -13,6 +13,10 @@ config INTEL_IOMMU
bool "Support for Intel IOMMU using DMA Remapping Devices"
depends on PCI_MSI && ACPI && X86
select IOMMU_API
+ select GENERIC_PT
+ select IOMMU_PT
+ select IOMMU_PT_X86_64
+ select IOMMU_PT_VTDSS
select IOMMU_IOVA
select IOMMU_IOPF
select IOMMUFD_DRIVER if IOMMUFD
@@ -66,7 +70,7 @@ config INTEL_IOMMU_DEFAULT_ON
config INTEL_IOMMU_FLOPPY_WA
def_bool y
- depends on X86
+ depends on X86 && BLK_DEV_FD
help
Floppy disk drivers are known to bypass DMA API calls
thereby failing to work when IOMMU is enabled. This
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index e236c7ec221f..4e888867e85c 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -45,16 +45,9 @@
#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
-#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
-#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
-
-/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
- to match. That way, we can use 'unsigned long' for PFNs with impunity. */
-#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
- __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
-#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
-
static void __init check_tylersburg_isoch(void);
+static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
+ bool enable);
static int rwbf_quirk;
#define rwbf_required(iommu) (rwbf_quirk || cap_rwbf((iommu)->cap))
@@ -217,7 +210,6 @@ static int disable_igfx_iommu;
#define IDENTMAP_AZALIA 4
const struct iommu_ops intel_iommu_ops;
-static const struct iommu_dirty_ops intel_dirty_ops;
static bool translation_pre_enabled(struct intel_iommu *iommu)
{
@@ -285,13 +277,6 @@ static int __init intel_iommu_setup(char *str)
}
__setup("intel_iommu=", intel_iommu_setup);
-static int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn)
-{
- int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
-
- return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
-}
-
/*
* Calculate the Supported Adjusted Guest Address Widths of an IOMMU.
* Refer to 11.4.2 of the VT-d spec for the encoding of each bit of
@@ -353,23 +338,6 @@ static bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
}
-/* Return the super pagesize bitmap if supported. */
-static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
-{
- unsigned long bitmap = 0;
-
- /*
- * 1-level super page supports page size of 2MiB, 2-level super page
- * supports page size of both 2MiB and 1GiB.
- */
- if (domain->iommu_superpage == 1)
- bitmap |= SZ_2M;
- else if (domain->iommu_superpage == 2)
- bitmap |= SZ_2M | SZ_1G;
-
- return bitmap;
-}
-
struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
u8 devfn, int alloc)
{
@@ -556,13 +524,6 @@ out:
return iommu;
}
-static void domain_flush_cache(struct dmar_domain *domain,
- void *addr, int size)
-{
- if (!domain->iommu_coherency)
- clflush_cache_range(addr, size);
-}
-
static void free_context_table(struct intel_iommu *iommu)
{
struct context_entry *context;
@@ -707,280 +668,6 @@ pgtable_walk:
}
#endif
-static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
- unsigned long pfn, int *target_level,
- gfp_t gfp)
-{
- struct dma_pte *parent, *pte;
- int level = agaw_to_level(domain->agaw);
- int offset;
-
- if (!domain_pfn_supported(domain, pfn))
- /* Address beyond IOMMU's addressing capabilities. */
- return NULL;
-
- parent = domain->pgd;
-
- while (1) {
- void *tmp_page;
-
- offset = pfn_level_offset(pfn, level);
- pte = &parent[offset];
- if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
- break;
- if (level == *target_level)
- break;
-
- if (!dma_pte_present(pte)) {
- uint64_t pteval, tmp;
-
- tmp_page = iommu_alloc_pages_node_sz(domain->nid, gfp,
- SZ_4K);
-
- if (!tmp_page)
- return NULL;
-
- domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
- pteval = virt_to_phys(tmp_page) | DMA_PTE_READ |
- DMA_PTE_WRITE;
- if (domain->use_first_level)
- pteval |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
-
- tmp = 0ULL;
- if (!try_cmpxchg64(&pte->val, &tmp, pteval))
- /* Someone else set it while we were thinking; use theirs. */
- iommu_free_pages(tmp_page);
- else
- domain_flush_cache(domain, pte, sizeof(*pte));
- }
- if (level == 1)
- break;
-
- parent = phys_to_virt(dma_pte_addr(pte));
- level--;
- }
-
- if (!*target_level)
- *target_level = level;
-
- return pte;
-}
-
-/* return address's pte at specific level */
-static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
- unsigned long pfn,
- int level, int *large_page)
-{
- struct dma_pte *parent, *pte;
- int total = agaw_to_level(domain->agaw);
- int offset;
-
- parent = domain->pgd;
- while (level <= total) {
- offset = pfn_level_offset(pfn, total);
- pte = &parent[offset];
- if (level == total)
- return pte;
-
- if (!dma_pte_present(pte)) {
- *large_page = total;
- break;
- }
-
- if (dma_pte_superpage(pte)) {
- *large_page = total;
- return pte;
- }
-
- parent = phys_to_virt(dma_pte_addr(pte));
- total--;
- }
- return NULL;
-}
-
-/* clear last level pte, a tlb flush should be followed */
-static void dma_pte_clear_range(struct dmar_domain *domain,
- unsigned long start_pfn,
- unsigned long last_pfn)
-{
- unsigned int large_page;
- struct dma_pte *first_pte, *pte;
-
- if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) ||
- WARN_ON(start_pfn > last_pfn))
- return;
-
- /* we don't need lock here; nobody else touches the iova range */
- do {
- large_page = 1;
- first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
- if (!pte) {
- start_pfn = align_to_level(start_pfn + 1, large_page + 1);
- continue;
- }
- do {
- dma_clear_pte(pte);
- start_pfn += lvl_to_nr_pages(large_page);
- pte++;
- } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
-
- domain_flush_cache(domain, first_pte,
- (void *)pte - (void *)first_pte);
-
- } while (start_pfn && start_pfn <= last_pfn);
-}
-
-static void dma_pte_free_level(struct dmar_domain *domain, int level,
- int retain_level, struct dma_pte *pte,
- unsigned long pfn, unsigned long start_pfn,
- unsigned long last_pfn)
-{
- pfn = max(start_pfn, pfn);
- pte = &pte[pfn_level_offset(pfn, level)];
-
- do {
- unsigned long level_pfn;
- struct dma_pte *level_pte;
-
- if (!dma_pte_present(pte) || dma_pte_superpage(pte))
- goto next;
-
- level_pfn = pfn & level_mask(level);
- level_pte = phys_to_virt(dma_pte_addr(pte));
-
- if (level > 2) {
- dma_pte_free_level(domain, level - 1, retain_level,
- level_pte, level_pfn, start_pfn,
- last_pfn);
- }
-
- /*
- * Free the page table if we're below the level we want to
- * retain and the range covers the entire table.
- */
- if (level < retain_level && !(start_pfn > level_pfn ||
- last_pfn < level_pfn + level_size(level) - 1)) {
- dma_clear_pte(pte);
- domain_flush_cache(domain, pte, sizeof(*pte));
- iommu_free_pages(level_pte);
- }
-next:
- pfn += level_size(level);
- } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
-}
-
-/*
- * clear last level (leaf) ptes and free page table pages below the
- * level we wish to keep intact.
- */
-static void dma_pte_free_pagetable(struct dmar_domain *domain,
- unsigned long start_pfn,
- unsigned long last_pfn,
- int retain_level)
-{
- dma_pte_clear_range(domain, start_pfn, last_pfn);
-
- /* We don't need lock here; nobody else touches the iova range */
- dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
- domain->pgd, 0, start_pfn, last_pfn);
-
- /* free pgd */
- if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
- iommu_free_pages(domain->pgd);
- domain->pgd = NULL;
- }
-}
-
-/* When a page at a given level is being unlinked from its parent, we don't
- need to *modify* it at all. All we need to do is make a list of all the
- pages which can be freed just as soon as we've flushed the IOTLB and we
- know the hardware page-walk will no longer touch them.
- The 'pte' argument is the *parent* PTE, pointing to the page that is to
- be freed. */
-static void dma_pte_list_pagetables(struct dmar_domain *domain,
- int level, struct dma_pte *parent_pte,
- struct iommu_pages_list *freelist)
-{
- struct dma_pte *pte = phys_to_virt(dma_pte_addr(parent_pte));
-
- iommu_pages_list_add(freelist, pte);
-
- if (level == 1)
- return;
-
- do {
- if (dma_pte_present(pte) && !dma_pte_superpage(pte))
- dma_pte_list_pagetables(domain, level - 1, pte, freelist);
- pte++;
- } while (!first_pte_in_page(pte));
-}
-
-static void dma_pte_clear_level(struct dmar_domain *domain, int level,
- struct dma_pte *pte, unsigned long pfn,
- unsigned long start_pfn, unsigned long last_pfn,
- struct iommu_pages_list *freelist)
-{
- struct dma_pte *first_pte = NULL, *last_pte = NULL;
-
- pfn = max(start_pfn, pfn);
- pte = &pte[pfn_level_offset(pfn, level)];
-
- do {
- unsigned long level_pfn = pfn & level_mask(level);
-
- if (!dma_pte_present(pte))
- goto next;
-
- /* If range covers entire pagetable, free it */
- if (start_pfn <= level_pfn &&
- last_pfn >= level_pfn + level_size(level) - 1) {
- /* These suborbinate page tables are going away entirely. Don't
- bother to clear them; we're just going to *free* them. */
- if (level > 1 && !dma_pte_superpage(pte))
- dma_pte_list_pagetables(domain, level - 1, pte, freelist);
-
- dma_clear_pte(pte);
- if (!first_pte)
- first_pte = pte;
- last_pte = pte;
- } else if (level > 1) {
- /* Recurse down into a level that isn't *entirely* obsolete */
- dma_pte_clear_level(domain, level - 1,
- phys_to_virt(dma_pte_addr(pte)),
- level_pfn, start_pfn, last_pfn,
- freelist);
- }
-next:
- pfn = level_pfn + level_size(level);
- } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
-
- if (first_pte)
- domain_flush_cache(domain, first_pte,
- (void *)++last_pte - (void *)first_pte);
-}
-
-/* We can't just free the pages because the IOMMU may still be walking
- the page tables, and may have cached the intermediate levels. The
- pages can only be freed after the IOTLB flush has been done. */
-static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
- unsigned long last_pfn,
- struct iommu_pages_list *freelist)
-{
- if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) ||
- WARN_ON(start_pfn > last_pfn))
- return;
-
- /* we don't need lock here; nobody else touches the iova range */
- dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
- domain->pgd, 0, start_pfn, last_pfn, freelist);
-
- /* free pgd */
- if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
- iommu_pages_list_add(freelist, domain->pgd);
- domain->pgd = NULL;
- }
-}
-
/* iommu handling */
static int iommu_alloc_root_entry(struct intel_iommu *iommu)
{
@@ -1460,13 +1147,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
domain_lookup_dev_info(domain, iommu, bus, devfn);
u16 did = domain_id_iommu(domain, iommu);
int translation = CONTEXT_TT_MULTI_LEVEL;
- struct dma_pte *pgd = domain->pgd;
+ struct pt_iommu_vtdss_hw_info pt_info;
struct context_entry *context;
int ret;
if (WARN_ON(!intel_domain_is_ss_paging(domain)))
return -EINVAL;
+ pt_iommu_vtdss_hw_info(&domain->sspt, &pt_info);
+
pr_debug("Set context mapping for %02x:%02x.%d\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1489,8 +1178,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
else
translation = CONTEXT_TT_MULTI_LEVEL;
- context_set_address_root(context, virt_to_phys(pgd));
- context_set_address_width(context, domain->agaw);
+ context_set_address_root(context, pt_info.ssptptr);
+ context_set_address_width(context, pt_info.aw);
context_set_translation_type(context, translation);
context_set_fault_enable(context);
context_set_present(context);
@@ -1537,177 +1226,6 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev)
return 0;
}
-/* Return largest possible superpage level for a given mapping */
-static int hardware_largepage_caps(struct dmar_domain *domain, unsigned long iov_pfn,
- unsigned long phy_pfn, unsigned long pages)
-{
- int support, level = 1;
- unsigned long pfnmerge;
-
- support = domain->iommu_superpage;
-
- /* To use a large page, the virtual *and* physical addresses
- must be aligned to 2MiB/1GiB/etc. Lower bits set in either
- of them will mean we have to use smaller pages. So just
- merge them and check both at once. */
- pfnmerge = iov_pfn | phy_pfn;
-
- while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
- pages >>= VTD_STRIDE_SHIFT;
- if (!pages)
- break;
- pfnmerge >>= VTD_STRIDE_SHIFT;
- level++;
- support--;
- }
- return level;
-}
-
-/*
- * Ensure that old small page tables are removed to make room for superpage(s).
- * We're going to add new large pages, so make sure we don't remove their parent
- * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared.
- */
-static void switch_to_super_page(struct dmar_domain *domain,
- unsigned long start_pfn,
- unsigned long end_pfn, int level)
-{
- unsigned long lvl_pages = lvl_to_nr_pages(level);
- struct dma_pte *pte = NULL;
-
- if (WARN_ON(!IS_ALIGNED(start_pfn, lvl_pages) ||
- !IS_ALIGNED(end_pfn + 1, lvl_pages)))
- return;
-
- while (start_pfn <= end_pfn) {
- if (!pte)
- pte = pfn_to_dma_pte(domain, start_pfn, &level,
- GFP_ATOMIC);
-
- if (dma_pte_present(pte)) {
- dma_pte_free_pagetable(domain, start_pfn,
- start_pfn + lvl_pages - 1,
- level + 1);
-
- cache_tag_flush_range(domain, start_pfn << VTD_PAGE_SHIFT,
- end_pfn << VTD_PAGE_SHIFT, 0);
- }
-
- pte++;
- start_pfn += lvl_pages;
- if (first_pte_in_page(pte))
- pte = NULL;
- }
-}
-
-static int
-__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
- unsigned long phys_pfn, unsigned long nr_pages, int prot,
- gfp_t gfp)
-{
- struct dma_pte *first_pte = NULL, *pte = NULL;
- unsigned int largepage_lvl = 0;
- unsigned long lvl_pages = 0;
- phys_addr_t pteval;
- u64 attr;
-
- if (unlikely(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)))
- return -EINVAL;
-
- if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
- return -EINVAL;
-
- if (!(prot & DMA_PTE_WRITE) && domain->nested_parent) {
- pr_err_ratelimited("Read-only mapping is disallowed on the domain which serves as the parent in a nested configuration, due to HW errata (ERRATA_772415_SPR17)\n");
- return -EINVAL;
- }
-
- attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
- if (domain->use_first_level) {
- attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
- if (prot & DMA_PTE_WRITE)
- attr |= DMA_FL_PTE_DIRTY;
- }
-
- domain->has_mappings = true;
-
- pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
-
- while (nr_pages > 0) {
- uint64_t tmp;
-
- if (!pte) {
- largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
- phys_pfn, nr_pages);
-
- pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl,
- gfp);
- if (!pte)
- return -ENOMEM;
- first_pte = pte;
-
- lvl_pages = lvl_to_nr_pages(largepage_lvl);
-
- /* It is large page*/
- if (largepage_lvl > 1) {
- unsigned long end_pfn;
- unsigned long pages_to_remove;
-
- pteval |= DMA_PTE_LARGE_PAGE;
- pages_to_remove = min_t(unsigned long,
- round_down(nr_pages, lvl_pages),
- nr_pte_to_next_page(pte) * lvl_pages);
- end_pfn = iov_pfn + pages_to_remove - 1;
- switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
- } else {
- pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
- }
-
- }
- /* We don't need lock here, nobody else
- * touches the iova range
- */
- tmp = 0ULL;
- if (!try_cmpxchg64_local(&pte->val, &tmp, pteval)) {
- static int dumps = 5;
- pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
- iov_pfn, tmp, (unsigned long long)pteval);
- if (dumps) {
- dumps--;
- debug_dma_dump_mappings(NULL);
- }
- WARN_ON(1);
- }
-
- nr_pages -= lvl_pages;
- iov_pfn += lvl_pages;
- phys_pfn += lvl_pages;
- pteval += lvl_pages * VTD_PAGE_SIZE;
-
- /* If the next PTE would be the first in a new page, then we
- * need to flush the cache on the entries we've just written.
- * And then we'll need to recalculate 'pte', so clear it and
- * let it get set again in the if (!pte) block above.
- *
- * If we're done (!nr_pages) we need to flush the cache too.
- *
- * Also if we've been setting superpages, we may need to
- * recalculate 'pte' and switch back to smaller pages for the
- * end of the mapping, if the trailing size is not enough to
- * use another superpage (i.e. nr_pages < lvl_pages).
- */
- pte++;
- if (!nr_pages || first_pte_in_page(pte) ||
- (largepage_lvl > 1 && nr_pages < lvl_pages)) {
- domain_flush_cache(domain, first_pte,
- (void *)pte - (void *)first_pte);
- pte = NULL;
- }
- }
-
- return 0;
-}
-
static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
{
struct intel_iommu *iommu = info->iommu;
@@ -1769,22 +1287,26 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
struct device *dev,
u32 pasid, struct iommu_domain *old)
{
- struct dma_pte *pgd = domain->pgd;
- int level, flags = 0;
+ struct pt_iommu_x86_64_hw_info pt_info;
+ unsigned int flags = 0;
- level = agaw_to_level(domain->agaw);
- if (level != 4 && level != 5)
+ pt_iommu_x86_64_hw_info(&domain->fspt, &pt_info);
+ if (WARN_ON(pt_info.levels != 4 && pt_info.levels != 5))
return -EINVAL;
- if (level == 5)
+ if (pt_info.levels == 5)
flags |= PASID_FLAG_FL5LP;
if (domain->force_snooping)
flags |= PASID_FLAG_PAGE_SNOOP;
+ if (!(domain->fspt.x86_64_pt.common.features &
+ BIT(PT_FEAT_DMA_INCOHERENT)))
+ flags |= PASID_FLAG_PWSNP;
+
return __domain_setup_first_level(iommu, dev, pasid,
domain_id_iommu(domain, iommu),
- __pa(pgd), flags, old);
+ pt_info.gcr3_pt, flags, old);
}
static int dmar_domain_attach_device(struct dmar_domain *domain,
@@ -3230,7 +2752,8 @@ void device_block_translation(struct device *dev)
}
static int blocking_domain_attach_dev(struct iommu_domain *domain,
- struct device *dev)
+ struct device *dev,
+ struct iommu_domain *old)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
@@ -3251,23 +2774,9 @@ static struct iommu_domain blocking_domain = {
}
};
-static int iommu_superpage_capability(struct intel_iommu *iommu, bool first_stage)
-{
- if (!intel_iommu_superpage)
- return 0;
-
- if (first_stage)
- return cap_fl1gp_support(iommu->cap) ? 2 : 1;
-
- return fls(cap_super_page_val(iommu->cap));
-}
-
-static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_stage)
+static struct dmar_domain *paging_domain_alloc(void)
{
- struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct intel_iommu *iommu = info->iommu;
struct dmar_domain *domain;
- int addr_width;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
@@ -3282,56 +2791,38 @@ static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_st
INIT_LIST_HEAD(&domain->s1_domains);
spin_lock_init(&domain->s1_lock);
- domain->nid = dev_to_node(dev);
- domain->use_first_level = first_stage;
-
- domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
-
- /* calculate the address width */
- addr_width = agaw_to_width(iommu->agaw);
- if (addr_width > cap_mgaw(iommu->cap))
- addr_width = cap_mgaw(iommu->cap);
- domain->gaw = addr_width;
- domain->agaw = iommu->agaw;
- domain->max_addr = __DOMAIN_MAX_ADDR(addr_width);
-
- /* iommu memory access coherency */
- domain->iommu_coherency = iommu_paging_structure_coherency(iommu);
+ return domain;
+}
- /* pagesize bitmap */
- domain->domain.pgsize_bitmap = SZ_4K;
- domain->iommu_superpage = iommu_superpage_capability(iommu, first_stage);
- domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
+static unsigned int compute_vasz_lg2_fs(struct intel_iommu *iommu,
+ unsigned int *top_level)
+{
+ unsigned int mgaw = cap_mgaw(iommu->cap);
/*
- * IOVA aperture: First-level translation restricts the input-address
- * to a canonical address (i.e., address bits 63:N have the same value
- * as address bit [N-1], where N is 48-bits with 4-level paging and
- * 57-bits with 5-level paging). Hence, skip bit [N-1].
+ * Spec 3.6 First-Stage Translation:
+ *
+ * Software must limit addresses to less than the minimum of MGAW
+ * and the lower canonical address width implied by FSPM (i.e.,
+ * 47-bit when FSPM is 4-level and 56-bit when FSPM is 5-level).
*/
- domain->domain.geometry.force_aperture = true;
- domain->domain.geometry.aperture_start = 0;
- if (first_stage)
- domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
- else
- domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
-
- /* always allocate the top pgd */
- domain->pgd = iommu_alloc_pages_node_sz(domain->nid, GFP_KERNEL, SZ_4K);
- if (!domain->pgd) {
- kfree(domain);
- return ERR_PTR(-ENOMEM);
+ if (mgaw > 48 && cap_fl5lp_support(iommu->cap)) {
+ *top_level = 4;
+ return min(57, mgaw);
}
- domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
- return domain;
+ /* Four level is always supported */
+ *top_level = 3;
+ return min(48, mgaw);
}
static struct iommu_domain *
intel_iommu_domain_alloc_first_stage(struct device *dev,
struct intel_iommu *iommu, u32 flags)
{
+ struct pt_iommu_x86_64_cfg cfg = {};
struct dmar_domain *dmar_domain;
+ int ret;
if (flags & ~IOMMU_HWPT_ALLOC_PASID)
return ERR_PTR(-EOPNOTSUPP);
@@ -3340,10 +2831,20 @@ intel_iommu_domain_alloc_first_stage(struct device *dev,
if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
return ERR_PTR(-EOPNOTSUPP);
- dmar_domain = paging_domain_alloc(dev, true);
+ dmar_domain = paging_domain_alloc();
if (IS_ERR(dmar_domain))
return ERR_CAST(dmar_domain);
+ cfg.common.hw_max_vasz_lg2 =
+ compute_vasz_lg2_fs(iommu, &cfg.top_level);
+ cfg.common.hw_max_oasz_lg2 = 52;
+ cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) |
+ BIT(PT_FEAT_FLUSH_RANGE);
+ /* First stage always uses scalable mode */
+ if (!ecap_smpwc(iommu->ecap))
+ cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT);
+ dmar_domain->iommu.iommu_device = dev;
+ dmar_domain->iommu.nid = dev_to_node(dev);
dmar_domain->domain.ops = &intel_fs_paging_domain_ops;
/*
* iotlb sync for map is only needed for legacy implementations that
@@ -3353,14 +2854,58 @@ intel_iommu_domain_alloc_first_stage(struct device *dev,
if (rwbf_required(iommu))
dmar_domain->iotlb_sync_map = true;
+ ret = pt_iommu_x86_64_init(&dmar_domain->fspt, &cfg, GFP_KERNEL);
+ if (ret) {
+ kfree(dmar_domain);
+ return ERR_PTR(ret);
+ }
+
+ if (!cap_fl1gp_support(iommu->cap))
+ dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G;
+ if (!intel_iommu_superpage)
+ dmar_domain->domain.pgsize_bitmap = SZ_4K;
+
return &dmar_domain->domain;
}
+static unsigned int compute_vasz_lg2_ss(struct intel_iommu *iommu,
+ unsigned int *top_level)
+{
+ unsigned int sagaw = cap_sagaw(iommu->cap);
+ unsigned int mgaw = cap_mgaw(iommu->cap);
+
+ /*
+ * Find the largest table size that both the mgaw and sagaw support.
+ * This sets the valid range of IOVA and the top starting level.
+ * Some HW may only support a 4 or 5 level walk but must limit IOVA to
+ * 3 levels.
+ */
+ if (mgaw > 48 && sagaw >= BIT(3)) {
+ *top_level = 4;
+ return min(57, mgaw);
+ } else if (mgaw > 39 && sagaw >= BIT(2)) {
+ *top_level = 3 + ffs(sagaw >> 3);
+ return min(48, mgaw);
+ } else if (mgaw > 30 && sagaw >= BIT(1)) {
+ *top_level = 2 + ffs(sagaw >> 2);
+ return min(39, mgaw);
+ }
+ return 0;
+}
+
+static const struct iommu_dirty_ops intel_second_stage_dirty_ops = {
+ IOMMU_PT_DIRTY_OPS(vtdss),
+ .set_dirty_tracking = intel_iommu_set_dirty_tracking,
+};
+
static struct iommu_domain *
intel_iommu_domain_alloc_second_stage(struct device *dev,
struct intel_iommu *iommu, u32 flags)
{
+ struct pt_iommu_vtdss_cfg cfg = {};
struct dmar_domain *dmar_domain;
+ unsigned int sslps;
+ int ret;
if (flags &
(~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
@@ -3377,15 +2922,46 @@ intel_iommu_domain_alloc_second_stage(struct device *dev,
if (sm_supported(iommu) && !ecap_slts(iommu->ecap))
return ERR_PTR(-EOPNOTSUPP);
- dmar_domain = paging_domain_alloc(dev, false);
+ dmar_domain = paging_domain_alloc();
if (IS_ERR(dmar_domain))
return ERR_CAST(dmar_domain);
+ cfg.common.hw_max_vasz_lg2 = compute_vasz_lg2_ss(iommu, &cfg.top_level);
+ cfg.common.hw_max_oasz_lg2 = 52;
+ cfg.common.features = BIT(PT_FEAT_FLUSH_RANGE);
+
+ /*
+ * Read-only mapping is disallowed on the domain which serves as the
+ * parent in a nested configuration, due to HW errata
+ * (ERRATA_772415_SPR17)
+ */
+ if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)
+ cfg.common.features |= BIT(PT_FEAT_VTDSS_FORCE_WRITEABLE);
+
+ if (!iommu_paging_structure_coherency(iommu))
+ cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT);
+ dmar_domain->iommu.iommu_device = dev;
+ dmar_domain->iommu.nid = dev_to_node(dev);
dmar_domain->domain.ops = &intel_ss_paging_domain_ops;
dmar_domain->nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING)
- dmar_domain->domain.dirty_ops = &intel_dirty_ops;
+ dmar_domain->domain.dirty_ops = &intel_second_stage_dirty_ops;
+
+ ret = pt_iommu_vtdss_init(&dmar_domain->sspt, &cfg, GFP_KERNEL);
+ if (ret) {
+ kfree(dmar_domain);
+ return ERR_PTR(ret);
+ }
+
+ /* Adjust the supported page sizes to HW capability */
+ sslps = cap_super_page_val(iommu->cap);
+ if (!(sslps & BIT(0)))
+ dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_2M;
+ if (!(sslps & BIT(1)))
+ dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G;
+ if (!intel_iommu_superpage)
+ dmar_domain->domain.pgsize_bitmap = SZ_4K;
/*
* Besides the internal write buffer flush, the caching mode used for
@@ -3427,14 +3003,7 @@ static void intel_iommu_domain_free(struct iommu_domain *domain)
if (WARN_ON(!list_empty(&dmar_domain->devices)))
return;
- if (dmar_domain->pgd) {
- struct iommu_pages_list freelist =
- IOMMU_PAGES_LIST_INIT(freelist);
-
- domain_unmap(dmar_domain, 0, DOMAIN_MAX_PFN(dmar_domain->gaw),
- &freelist);
- iommu_put_pages_list(&freelist);
- }
+ pt_iommu_deinit(&dmar_domain->iommu);
kfree(dmar_domain->qi_batch);
kfree(dmar_domain);
@@ -3451,6 +3020,16 @@ static int paging_domain_compatible_first_stage(struct dmar_domain *dmar_domain,
if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
return -EINVAL;
+ if (!ecap_smpwc(iommu->ecap) &&
+ !(dmar_domain->fspt.x86_64_pt.common.features &
+ BIT(PT_FEAT_DMA_INCOHERENT)))
+ return -EINVAL;
+
+ /* Supports the number of table levels */
+ if (!cap_fl5lp_support(iommu->cap) &&
+ dmar_domain->fspt.x86_64_pt.common.max_vasz_lg2 > 48)
+ return -EINVAL;
+
/* Same page size support */
if (!cap_fl1gp_support(iommu->cap) &&
(dmar_domain->domain.pgsize_bitmap & SZ_1G))
@@ -3467,7 +3046,11 @@ static int
paging_domain_compatible_second_stage(struct dmar_domain *dmar_domain,
struct intel_iommu *iommu)
{
+ unsigned int vasz_lg2 = dmar_domain->sspt.vtdss_pt.common.max_vasz_lg2;
unsigned int sslps = cap_super_page_val(iommu->cap);
+ struct pt_iommu_vtdss_hw_info pt_info;
+
+ pt_iommu_vtdss_hw_info(&dmar_domain->sspt, &pt_info);
if (dmar_domain->domain.dirty_ops && !ssads_supported(iommu))
return -EINVAL;
@@ -3478,6 +3061,19 @@ paging_domain_compatible_second_stage(struct dmar_domain *dmar_domain,
if (sm_supported(iommu) && !ecap_slts(iommu->ecap))
return -EINVAL;
+ if (!iommu_paging_structure_coherency(iommu) &&
+ !(dmar_domain->sspt.vtdss_pt.common.features &
+ BIT(PT_FEAT_DMA_INCOHERENT)))
+ return -EINVAL;
+
+ /* Address width falls within the capability */
+ if (cap_mgaw(iommu->cap) < vasz_lg2)
+ return -EINVAL;
+
+ /* Page table level is supported. */
+ if (!(cap_sagaw(iommu->cap) & BIT(pt_info.aw)))
+ return -EINVAL;
+
/* Same page size support */
if (!(sslps & BIT(0)) && (dmar_domain->domain.pgsize_bitmap & SZ_2M))
return -EINVAL;
@@ -3489,6 +3085,14 @@ paging_domain_compatible_second_stage(struct dmar_domain *dmar_domain,
!dmar_domain->iotlb_sync_map)
return -EINVAL;
+ /*
+ * FIXME this is locked wrong, it needs to be under the
+ * dmar_domain->lock
+ */
+ if ((dmar_domain->sspt.vtdss_pt.common.features &
+ BIT(PT_FEAT_VTDSS_FORCE_COHERENCE)) &&
+ !ecap_sc_support(iommu->ecap))
+ return -EINVAL;
return 0;
}
@@ -3498,7 +3102,6 @@ int paging_domain_compatible(struct iommu_domain *domain, struct device *dev)
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu = info->iommu;
int ret = -EINVAL;
- int addr_width;
if (intel_domain_is_fs_paging(dmar_domain))
ret = paging_domain_compatible_first_stage(dmar_domain, iommu);
@@ -3509,26 +3112,6 @@ int paging_domain_compatible(struct iommu_domain *domain, struct device *dev)
if (ret)
return ret;
- /*
- * FIXME this is locked wrong, it needs to be under the
- * dmar_domain->lock
- */
- if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
- return -EINVAL;
-
- if (dmar_domain->iommu_coherency !=
- iommu_paging_structure_coherency(iommu))
- return -EINVAL;
-
-
- /* check if this iommu agaw is sufficient for max mapped address */
- addr_width = agaw_to_width(iommu->agaw);
- if (addr_width > cap_mgaw(iommu->cap))
- addr_width = cap_mgaw(iommu->cap);
-
- if (dmar_domain->gaw > addr_width || dmar_domain->agaw > iommu->agaw)
- return -EINVAL;
-
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
context_copied(iommu, info->bus, info->devfn))
return intel_pasid_setup_sm_context(dev);
@@ -3537,7 +3120,8 @@ int paging_domain_compatible(struct iommu_domain *domain, struct device *dev)
}
static int intel_iommu_attach_device(struct iommu_domain *domain,
- struct device *dev)
+ struct device *dev,
+ struct iommu_domain *old)
{
int ret;
@@ -3558,110 +3142,6 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
return ret;
}
-static int intel_iommu_map(struct iommu_domain *domain,
- unsigned long iova, phys_addr_t hpa,
- size_t size, int iommu_prot, gfp_t gfp)
-{
- struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- u64 max_addr;
- int prot = 0;
-
- if (iommu_prot & IOMMU_READ)
- prot |= DMA_PTE_READ;
- if (iommu_prot & IOMMU_WRITE)
- prot |= DMA_PTE_WRITE;
- if (dmar_domain->set_pte_snp)
- prot |= DMA_PTE_SNP;
-
- max_addr = iova + size;
- if (dmar_domain->max_addr < max_addr) {
- u64 end;
-
- /* check if minimum agaw is sufficient for mapped address */
- end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
- if (end < max_addr) {
- pr_err("%s: iommu width (%d) is not "
- "sufficient for the mapped address (%llx)\n",
- __func__, dmar_domain->gaw, max_addr);
- return -EFAULT;
- }
- dmar_domain->max_addr = max_addr;
- }
- /* Round up size to next multiple of PAGE_SIZE, if it and
- the low bits of hpa would take us onto the next page */
- size = aligned_nrpages(hpa, size);
- return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
- hpa >> VTD_PAGE_SHIFT, size, prot, gfp);
-}
-
-static int intel_iommu_map_pages(struct iommu_domain *domain,
- unsigned long iova, phys_addr_t paddr,
- size_t pgsize, size_t pgcount,
- int prot, gfp_t gfp, size_t *mapped)
-{
- unsigned long pgshift = __ffs(pgsize);
- size_t size = pgcount << pgshift;
- int ret;
-
- if (pgsize != SZ_4K && pgsize != SZ_2M && pgsize != SZ_1G)
- return -EINVAL;
-
- if (!IS_ALIGNED(iova | paddr, pgsize))
- return -EINVAL;
-
- ret = intel_iommu_map(domain, iova, paddr, size, prot, gfp);
- if (!ret && mapped)
- *mapped = size;
-
- return ret;
-}
-
-static size_t intel_iommu_unmap(struct iommu_domain *domain,
- unsigned long iova, size_t size,
- struct iommu_iotlb_gather *gather)
-{
- struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- unsigned long start_pfn, last_pfn;
- int level = 0;
-
- /* Cope with horrid API which requires us to unmap more than the
- size argument if it happens to be a large-page mapping. */
- if (unlikely(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT,
- &level, GFP_ATOMIC)))
- return 0;
-
- if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
- size = VTD_PAGE_SIZE << level_to_offset_bits(level);
-
- start_pfn = iova >> VTD_PAGE_SHIFT;
- last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
-
- domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist);
-
- if (dmar_domain->max_addr == iova + size)
- dmar_domain->max_addr = iova;
-
- /*
- * We do not use page-selective IOTLB invalidation in flush queue,
- * so there is no need to track page and sync iotlb.
- */
- if (!iommu_iotlb_gather_queued(gather))
- iommu_iotlb_gather_add_page(domain, gather, iova, size);
-
- return size;
-}
-
-static size_t intel_iommu_unmap_pages(struct iommu_domain *domain,
- unsigned long iova,
- size_t pgsize, size_t pgcount,
- struct iommu_iotlb_gather *gather)
-{
- unsigned long pgshift = __ffs(pgsize);
- size_t size = pgcount << pgshift;
-
- return intel_iommu_unmap(domain, iova, size, gather);
-}
-
static void intel_iommu_tlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
@@ -3671,24 +3151,6 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain,
iommu_put_pages_list(&gather->freelist);
}
-static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
- dma_addr_t iova)
-{
- struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- struct dma_pte *pte;
- int level = 0;
- u64 phys = 0;
-
- pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level,
- GFP_ATOMIC);
- if (pte && dma_pte_present(pte))
- phys = dma_pte_addr(pte) +
- (iova & (BIT_MASK(level_to_offset_bits(level) +
- VTD_PAGE_SHIFT) - 1));
-
- return phys;
-}
-
static bool domain_support_force_snooping(struct dmar_domain *domain)
{
struct device_domain_info *info;
@@ -3730,15 +3192,15 @@ static bool intel_iommu_enforce_cache_coherency_ss(struct iommu_domain *domain)
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
guard(spinlock_irqsave)(&dmar_domain->lock);
- if (!domain_support_force_snooping(dmar_domain) ||
- dmar_domain->has_mappings)
+ if (!domain_support_force_snooping(dmar_domain))
return false;
/*
* Second level page table supports per-PTE snoop control. The
* iommu_map() interface will handle this by setting SNP bit.
*/
- dmar_domain->set_pte_snp = true;
+ dmar_domain->sspt.vtdss_pt.common.features |=
+ BIT(PT_FEAT_VTDSS_FORCE_COHERENCE);
dmar_domain->force_snooping = true;
return true;
}
@@ -4302,49 +3764,6 @@ err_unwind:
return ret;
}
-static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain,
- unsigned long iova, size_t size,
- unsigned long flags,
- struct iommu_dirty_bitmap *dirty)
-{
- struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- unsigned long end = iova + size - 1;
- unsigned long pgsize;
-
- /*
- * IOMMUFD core calls into a dirty tracking disabled domain without an
- * IOVA bitmap set in order to clean dirty bits in all PTEs that might
- * have occurred when we stopped dirty tracking. This ensures that we
- * never inherit dirtied bits from a previous cycle.
- */
- if (!dmar_domain->dirty_tracking && dirty->bitmap)
- return -EINVAL;
-
- do {
- struct dma_pte *pte;
- int lvl = 0;
-
- pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &lvl,
- GFP_ATOMIC);
- pgsize = level_size(lvl) << VTD_PAGE_SHIFT;
- if (!pte || !dma_pte_present(pte)) {
- iova += pgsize;
- continue;
- }
-
- if (dma_sl_pte_test_and_clear_dirty(pte, flags))
- iommu_dirty_bitmap_record(dirty, iova, pgsize);
- iova += pgsize;
- } while (iova < end);
-
- return 0;
-}
-
-static const struct iommu_dirty_ops intel_dirty_ops = {
- .set_dirty_tracking = intel_iommu_set_dirty_tracking,
- .read_and_clear_dirty = intel_iommu_read_and_clear_dirty,
-};
-
static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
@@ -4401,7 +3820,9 @@ static int device_setup_pass_through(struct device *dev)
context_setup_pass_through_cb, dev);
}
-static int identity_domain_attach_dev(struct iommu_domain *domain, struct device *dev)
+static int identity_domain_attach_dev(struct iommu_domain *domain,
+ struct device *dev,
+ struct iommu_domain *old)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
@@ -4462,27 +3883,23 @@ static struct iommu_domain identity_domain = {
};
const struct iommu_domain_ops intel_fs_paging_domain_ops = {
+ IOMMU_PT_DOMAIN_OPS(x86_64),
.attach_dev = intel_iommu_attach_device,
.set_dev_pasid = intel_iommu_set_dev_pasid,
- .map_pages = intel_iommu_map_pages,
- .unmap_pages = intel_iommu_unmap_pages,
.iotlb_sync_map = intel_iommu_iotlb_sync_map,
.flush_iotlb_all = intel_flush_iotlb_all,
.iotlb_sync = intel_iommu_tlb_sync,
- .iova_to_phys = intel_iommu_iova_to_phys,
.free = intel_iommu_domain_free,
.enforce_cache_coherency = intel_iommu_enforce_cache_coherency_fs,
};
const struct iommu_domain_ops intel_ss_paging_domain_ops = {
+ IOMMU_PT_DOMAIN_OPS(vtdss),
.attach_dev = intel_iommu_attach_device,
.set_dev_pasid = intel_iommu_set_dev_pasid,
- .map_pages = intel_iommu_map_pages,
- .unmap_pages = intel_iommu_unmap_pages,
.iotlb_sync_map = intel_iommu_iotlb_sync_map,
.flush_iotlb_all = intel_flush_iotlb_all,
.iotlb_sync = intel_iommu_tlb_sync,
- .iova_to_phys = intel_iommu_iova_to_phys,
.free = intel_iommu_domain_free,
.enforce_cache_coherency = intel_iommu_enforce_cache_coherency_ss,
};
@@ -4797,3 +4214,5 @@ err:
return ret;
}
+
+MODULE_IMPORT_NS("GENERIC_PT_IOMMU");
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 3056583d7f56..25c5e22096d4 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -23,8 +23,8 @@
#include <linux/xarray.h>
#include <linux/perf_event.h>
#include <linux/pci.h>
+#include <linux/generic_pt/iommu.h>
-#include <asm/cacheflush.h>
#include <asm/iommu.h>
#include <uapi/linux/iommufd.h>
@@ -595,22 +595,20 @@ struct qi_batch {
};
struct dmar_domain {
- int nid; /* node id */
+ union {
+ struct iommu_domain domain;
+ struct pt_iommu iommu;
+ /* First stage page table */
+ struct pt_iommu_x86_64 fspt;
+ /* Second stage page table */
+ struct pt_iommu_vtdss sspt;
+ };
+
struct xarray iommu_array; /* Attached IOMMU array */
- u8 iommu_coherency: 1; /* indicate coherency of iommu access */
- u8 force_snooping : 1; /* Create IOPTEs with snoop control */
- u8 set_pte_snp:1;
- u8 use_first_level:1; /* DMA translation for the domain goes
- * through the first level page table,
- * otherwise, goes through the second
- * level.
- */
+ u8 force_snooping:1; /* Create PASID entry with snoop control */
u8 dirty_tracking:1; /* Dirty tracking is enabled */
u8 nested_parent:1; /* Has other domains nested on it */
- u8 has_mappings:1; /* Has mappings configured through
- * iommu_map() interface.
- */
u8 iotlb_sync_map:1; /* Need to flush IOTLB cache or write
* buffer when creating mappings.
*/
@@ -623,26 +621,9 @@ struct dmar_domain {
struct list_head cache_tags; /* Cache tag list */
struct qi_batch *qi_batch; /* Batched QI descriptors */
- int iommu_superpage;/* Level of superpages supported:
- 0 == 4KiB (no superpages), 1 == 2MiB,
- 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
union {
/* DMA remapping domain */
struct {
- /* virtual address */
- struct dma_pte *pgd;
- /* max guest address width */
- int gaw;
- /*
- * adjusted guest address width:
- * 0: level 2 30-bit
- * 1: level 3 39-bit
- * 2: level 4 48-bit
- * 3: level 5 57-bit
- */
- int agaw;
- /* maximum mapped address */
- u64 max_addr;
/* Protect the s1_domains list */
spinlock_t s1_lock;
/* Track s1_domains nested on this domain */
@@ -664,10 +645,10 @@ struct dmar_domain {
struct mmu_notifier notifier;
};
};
-
- struct iommu_domain domain; /* generic domain data structure for
- iommu core */
};
+PT_IOMMU_CHECK_DOMAIN(struct dmar_domain, iommu, domain);
+PT_IOMMU_CHECK_DOMAIN(struct dmar_domain, sspt.iommu, domain);
+PT_IOMMU_CHECK_DOMAIN(struct dmar_domain, fspt.iommu, domain);
/*
* In theory, the VT-d 4.0 spec can support up to 2 ^ 16 counters.
@@ -866,11 +847,6 @@ struct dma_pte {
u64 val;
};
-static inline void dma_clear_pte(struct dma_pte *pte)
-{
- pte->val = 0;
-}
-
static inline u64 dma_pte_addr(struct dma_pte *pte)
{
#ifdef CONFIG_64BIT
@@ -886,32 +862,11 @@ static inline bool dma_pte_present(struct dma_pte *pte)
return (pte->val & 3) != 0;
}
-static inline bool dma_sl_pte_test_and_clear_dirty(struct dma_pte *pte,
- unsigned long flags)
-{
- if (flags & IOMMU_DIRTY_NO_CLEAR)
- return (pte->val & DMA_SL_PTE_DIRTY) != 0;
-
- return test_and_clear_bit(DMA_SL_PTE_DIRTY_BIT,
- (unsigned long *)&pte->val);
-}
-
static inline bool dma_pte_superpage(struct dma_pte *pte)
{
return (pte->val & DMA_PTE_LARGE_PAGE);
}
-static inline bool first_pte_in_page(struct dma_pte *pte)
-{
- return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE);
-}
-
-static inline int nr_pte_to_next_page(struct dma_pte *pte)
-{
- return first_pte_in_page(pte) ? BIT_ULL(VTD_STRIDE_SHIFT) :
- (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte;
-}
-
static inline bool context_present(struct context_entry *context)
{
return (context->lo & 1);
@@ -927,11 +882,6 @@ static inline int agaw_to_level(int agaw)
return agaw + 2;
}
-static inline int agaw_to_width(int agaw)
-{
- return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
-}
-
static inline int width_to_agaw(int width)
{
return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
@@ -947,25 +897,6 @@ static inline int pfn_level_offset(u64 pfn, int level)
return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
}
-static inline u64 level_mask(int level)
-{
- return -1ULL << level_to_offset_bits(level);
-}
-
-static inline u64 level_size(int level)
-{
- return 1ULL << level_to_offset_bits(level);
-}
-
-static inline u64 align_to_level(u64 pfn, int level)
-{
- return (pfn + level_size(level) - 1) & level_mask(level);
-}
-
-static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
-{
- return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
-}
static inline void context_set_present(struct context_entry *context)
{
@@ -1097,7 +1028,7 @@ static inline void qi_desc_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
struct qi_desc *desc)
{
u8 dw = 0, dr = 0;
- int ih = 0;
+ int ih = addr & 1;
if (cap_write_drain(iommu->cap))
dw = 1;
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index 1b6ad9c900a5..a3fb8c193ca6 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -19,7 +19,7 @@
#include "pasid.h"
static int intel_nested_attach_dev(struct iommu_domain *domain,
- struct device *dev)
+ struct device *dev, struct iommu_domain *old)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
@@ -29,11 +29,6 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
device_block_translation(dev);
- if (iommu->agaw < dmar_domain->s2_domain->agaw) {
- dev_err_ratelimited(dev, "Adjusted guest address width not compatible\n");
- return -ENODEV;
- }
-
/*
* Stage-1 domain cannot work alone, it is nested on a s2_domain.
* The s2_domain will be used in nested translation, hence needs
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 52f678975da7..3e2255057079 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -366,7 +366,7 @@ static void pasid_pte_config_first_level(struct intel_iommu *iommu,
pasid_set_domain_id(pte, did);
pasid_set_address_width(pte, iommu->agaw);
- pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+ pasid_set_page_snoop(pte, flags & PASID_FLAG_PWSNP);
/* Setup Present and PASID Granular Transfer Type: */
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
@@ -461,19 +461,22 @@ int intel_pasid_replace_first_level(struct intel_iommu *iommu,
*/
static void pasid_pte_config_second_level(struct intel_iommu *iommu,
struct pasid_entry *pte,
- u64 pgd_val, int agaw, u16 did,
- bool dirty_tracking)
+ struct dmar_domain *domain, u16 did)
{
+ struct pt_iommu_vtdss_hw_info pt_info;
+
lockdep_assert_held(&iommu->lock);
+ pt_iommu_vtdss_hw_info(&domain->sspt, &pt_info);
pasid_clear_entry(pte);
pasid_set_domain_id(pte, did);
- pasid_set_slptr(pte, pgd_val);
- pasid_set_address_width(pte, agaw);
+ pasid_set_slptr(pte, pt_info.ssptptr);
+ pasid_set_address_width(pte, pt_info.aw);
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
pasid_set_fault_enable(pte);
- pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
- if (dirty_tracking)
+ pasid_set_page_snoop(pte, !(domain->sspt.vtdss_pt.common.features &
+ BIT(PT_FEAT_DMA_INCOHERENT)));
+ if (domain->dirty_tracking)
pasid_set_ssade(pte);
pasid_set_present(pte);
@@ -484,10 +487,9 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
struct device *dev, u32 pasid)
{
struct pasid_entry *pte;
- struct dma_pte *pgd;
- u64 pgd_val;
u16 did;
+
/*
* If hardware advertises no support for second level
* translation, return directly.
@@ -498,8 +500,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
return -EINVAL;
}
- pgd = domain->pgd;
- pgd_val = virt_to_phys(pgd);
did = domain_id_iommu(domain, iommu);
spin_lock(&iommu->lock);
@@ -514,8 +514,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
return -EBUSY;
}
- pasid_pte_config_second_level(iommu, pte, pgd_val, domain->agaw,
- did, domain->dirty_tracking);
+ pasid_pte_config_second_level(iommu, pte, domain, did);
spin_unlock(&iommu->lock);
pasid_flush_caches(iommu, pte, pasid, did);
@@ -529,8 +528,6 @@ int intel_pasid_replace_second_level(struct intel_iommu *iommu,
u32 pasid)
{
struct pasid_entry *pte, new_pte;
- struct dma_pte *pgd;
- u64 pgd_val;
u16 did;
/*
@@ -543,13 +540,9 @@ int intel_pasid_replace_second_level(struct intel_iommu *iommu,
return -EINVAL;
}
- pgd = domain->pgd;
- pgd_val = virt_to_phys(pgd);
did = domain_id_iommu(domain, iommu);
- pasid_pte_config_second_level(iommu, &new_pte, pgd_val,
- domain->agaw, did,
- domain->dirty_tracking);
+ pasid_pte_config_second_level(iommu, &new_pte, domain, did);
spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
@@ -747,10 +740,12 @@ static void pasid_pte_config_nestd(struct intel_iommu *iommu,
struct dmar_domain *s2_domain,
u16 did)
{
- struct dma_pte *pgd = s2_domain->pgd;
+ struct pt_iommu_vtdss_hw_info pt_info;
lockdep_assert_held(&iommu->lock);
+ pt_iommu_vtdss_hw_info(&s2_domain->sspt, &pt_info);
+
pasid_clear_entry(pte);
if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL)
@@ -770,11 +765,12 @@ static void pasid_pte_config_nestd(struct intel_iommu *iommu,
if (s2_domain->force_snooping)
pasid_set_pgsnp(pte);
- pasid_set_slptr(pte, virt_to_phys(pgd));
+ pasid_set_slptr(pte, pt_info.ssptptr);
pasid_set_fault_enable(pte);
pasid_set_domain_id(pte, did);
- pasid_set_address_width(pte, s2_domain->agaw);
- pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+ pasid_set_address_width(pte, pt_info.aw);
+ pasid_set_page_snoop(pte, !(s2_domain->sspt.vtdss_pt.common.features &
+ BIT(PT_FEAT_DMA_INCOHERENT)));
if (s2_domain->dirty_tracking)
pasid_set_ssade(pte);
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index a771a77d4239..b4c85242dc79 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -24,6 +24,7 @@
#define PASID_FLAG_NESTED BIT(1)
#define PASID_FLAG_PAGE_SNOOP BIT(2)
+#define PASID_FLAG_PWSNP BIT(2)
/*
* The PASID_FLAG_FL5LP flag Indicates using 5-level paging for first-
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index e147f71f91b7..71de7947971f 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -170,6 +170,7 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
/* Setup the pasid table: */
sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
+ sflags |= PASID_FLAG_PWSNP;
ret = __domain_setup_first_level(iommu, dev, pasid,
FLPT_DEFAULT_DID, __pa(mm->pgd),
sflags, old);