summaryrefslogtreecommitdiff
path: root/drivers/iommu/amd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/amd')
-rw-r--r--drivers/iommu/amd/Kconfig5
-rw-r--r--drivers/iommu/amd/Makefile2
-rw-r--r--drivers/iommu/amd/amd_iommu.h1
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h114
-rw-r--r--drivers/iommu/amd/debugfs.c2
-rw-r--r--drivers/iommu/amd/init.c15
-rw-r--r--drivers/iommu/amd/io_pgtable.c577
-rw-r--r--drivers/iommu/amd/io_pgtable_v2.c370
-rw-r--r--drivers/iommu/amd/iommu.c572
9 files changed, 328 insertions, 1330 deletions
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index ecef69c11144..f2acf471cb5d 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -11,10 +11,13 @@ config AMD_IOMMU
select MMU_NOTIFIER
select IOMMU_API
select IOMMU_IOVA
- select IOMMU_IO_PGTABLE
select IOMMU_SVA
select IOMMU_IOPF
select IOMMUFD_DRIVER if IOMMUFD
+ select GENERIC_PT
+ select IOMMU_PT
+ select IOMMU_PT_AMDV1
+ select IOMMU_PT_X86_64
depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
help
With this option you can enable support for AMD IOMMU hardware in
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index 59c04a67f398..5412a563c697 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o
+obj-y += iommu.o init.o quirks.o ppr.o pasid.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 9b4b589a54b5..25044d28f28a 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -88,7 +88,6 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag);
* the IOMMU used by this driver.
*/
void amd_iommu_flush_all_caches(struct amd_iommu *iommu);
-void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
void amd_iommu_domain_flush_pages(struct protection_domain *domain,
u64 address, size_t size);
void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data,
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index a698a2e7ce2a..78b1c44bd6b5 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -18,7 +18,7 @@
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/irqreturn.h>
-#include <linux/io-pgtable.h>
+#include <linux/generic_pt/iommu.h>
/*
* Maximum number of IOMMUs supported
@@ -247,6 +247,10 @@
#define CMD_BUFFER_ENTRIES 512
#define MMIO_CMD_SIZE_SHIFT 56
#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
+#define MMIO_CMD_HEAD_MASK GENMASK_ULL(18, 4) /* Command buffer head ptr field [18:4] */
+#define MMIO_CMD_BUFFER_HEAD(x) FIELD_GET(MMIO_CMD_HEAD_MASK, (x))
+#define MMIO_CMD_TAIL_MASK GENMASK_ULL(18, 4) /* Command buffer tail ptr field [18:4] */
+#define MMIO_CMD_BUFFER_TAIL(x) FIELD_GET(MMIO_CMD_TAIL_MASK, (x))
/* constants for event buffer handling */
#define EVT_BUFFER_SIZE 8192 /* 512 entries */
@@ -337,76 +341,7 @@
#define GUEST_PGTABLE_4_LEVEL 0x00
#define GUEST_PGTABLE_5_LEVEL 0x01
-#define PM_LEVEL_SHIFT(x) (12 + ((x) * 9))
-#define PM_LEVEL_SIZE(x) (((x) < 6) ? \
- ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \
- (0xffffffffffffffffULL))
-#define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
-#define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL)
-#define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \
- IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW)
-#define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL)
-
-#define PM_MAP_4k 0
#define PM_ADDR_MASK 0x000ffffffffff000ULL
-#define PM_MAP_MASK(lvl) (PM_ADDR_MASK & \
- (~((1ULL << (12 + ((lvl) * 9))) - 1)))
-#define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr))
-
-/*
- * Returns the page table level to use for a given page size
- * Pagesize is expected to be a power-of-two
- */
-#define PAGE_SIZE_LEVEL(pagesize) \
- ((__ffs(pagesize) - 12) / 9)
-/*
- * Returns the number of ptes to use for a given page size
- * Pagesize is expected to be a power-of-two
- */
-#define PAGE_SIZE_PTE_COUNT(pagesize) \
- (1ULL << ((__ffs(pagesize) - 12) % 9))
-
-/*
- * Aligns a given io-virtual address to a given page size
- * Pagesize is expected to be a power-of-two
- */
-#define PAGE_SIZE_ALIGN(address, pagesize) \
- ((address) & ~((pagesize) - 1))
-/*
- * Creates an IOMMU PTE for an address and a given pagesize
- * The PTE has no permission bits set
- * Pagesize is expected to be a power-of-two larger than 4096
- */
-#define PAGE_SIZE_PTE(address, pagesize) \
- (((address) | ((pagesize) - 1)) & \
- (~(pagesize >> 1)) & PM_ADDR_MASK)
-
-/*
- * Takes a PTE value with mode=0x07 and returns the page size it maps
- */
-#define PTE_PAGE_SIZE(pte) \
- (1ULL << (1 + ffz(((pte) | 0xfffULL))))
-
-/*
- * Takes a page-table level and returns the default page-size for this level
- */
-#define PTE_LEVEL_PAGE_SIZE(level) \
- (1ULL << (12 + (9 * (level))))
-
-/*
- * The IOPTE dirty bit
- */
-#define IOMMU_PTE_HD_BIT (6)
-
-/*
- * Bit value definition for I/O PTE fields
- */
-#define IOMMU_PTE_PR BIT_ULL(0)
-#define IOMMU_PTE_HD BIT_ULL(IOMMU_PTE_HD_BIT)
-#define IOMMU_PTE_U BIT_ULL(59)
-#define IOMMU_PTE_FC BIT_ULL(60)
-#define IOMMU_PTE_IR BIT_ULL(61)
-#define IOMMU_PTE_IW BIT_ULL(62)
/*
* Bit value definition for DTE fields
@@ -436,12 +371,6 @@
/* DTE[128:179] | DTE[184:191] */
#define DTE_DATA2_INTR_MASK ~GENMASK_ULL(55, 52)
-#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
-#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR)
-#define IOMMU_PTE_DIRTY(pte) ((pte) & IOMMU_PTE_HD)
-#define IOMMU_PTE_PAGE(pte) (iommu_phys_to_virt((pte) & IOMMU_PAGE_MASK))
-#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
-
#define IOMMU_PROT_MASK 0x03
#define IOMMU_PROT_IR 0x01
#define IOMMU_PROT_IW 0x02
@@ -534,19 +463,6 @@ struct amd_irte_ops;
#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0)
-#define io_pgtable_to_data(x) \
- container_of((x), struct amd_io_pgtable, pgtbl)
-
-#define io_pgtable_ops_to_data(x) \
- io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
-
-#define io_pgtable_ops_to_domain(x) \
- container_of(io_pgtable_ops_to_data(x), \
- struct protection_domain, iop)
-
-#define io_pgtable_cfg_to_data(x) \
- container_of((x), struct amd_io_pgtable, pgtbl.cfg)
-
struct gcr3_tbl_info {
u64 *gcr3_tbl; /* Guest CR3 table */
int glx; /* Number of levels for GCR3 table */
@@ -554,14 +470,6 @@ struct gcr3_tbl_info {
u16 domid; /* Per device domain ID */
};
-struct amd_io_pgtable {
- seqcount_t seqcount; /* Protects root/mode update */
- struct io_pgtable pgtbl;
- int mode;
- u64 *root;
- u64 *pgd; /* v2 pgtable pgd pointer */
-};
-
enum protection_domain_mode {
PD_MODE_NONE,
PD_MODE_V1,
@@ -589,10 +497,13 @@ struct pdom_iommu_info {
* independent of their use.
*/
struct protection_domain {
+ union {
+ struct iommu_domain domain;
+ struct pt_iommu iommu;
+ struct pt_iommu_amdv1 amdv1;
+ struct pt_iommu_x86_64 amdv2;
+ };
struct list_head dev_list; /* List of all devices in this domain */
- struct iommu_domain domain; /* generic domain handle used by
- iommu core code */
- struct amd_io_pgtable iop;
spinlock_t lock; /* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
enum protection_domain_mode pd_mode; /* Track page table type */
@@ -602,6 +513,9 @@ struct protection_domain {
struct mmu_notifier mn; /* mmu notifier for the SVA domain */
struct list_head dev_data_list; /* List of pdom_dev_data */
};
+PT_IOMMU_CHECK_DOMAIN(struct protection_domain, iommu, domain);
+PT_IOMMU_CHECK_DOMAIN(struct protection_domain, amdv1.iommu, domain);
+PT_IOMMU_CHECK_DOMAIN(struct protection_domain, amdv2.iommu, domain);
/*
* This structure contains information about one PCI segment in the system.
diff --git a/drivers/iommu/amd/debugfs.c b/drivers/iommu/amd/debugfs.c
index 10fa217a7119..20b04996441d 100644
--- a/drivers/iommu/amd/debugfs.c
+++ b/drivers/iommu/amd/debugfs.c
@@ -37,7 +37,7 @@ static ssize_t iommu_mmio_write(struct file *filp, const char __user *ubuf,
if (ret)
return ret;
- if (iommu->dbg_mmio_offset > iommu->mmio_phys_end - 4) {
+ if (iommu->dbg_mmio_offset > iommu->mmio_phys_end - sizeof(u64)) {
iommu->dbg_mmio_offset = -1;
return -EINVAL;
}
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index f2991c11867c..4f4d4955269e 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1710,13 +1710,22 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list);
if (alloc_dev_table(pci_seg))
- return NULL;
+ goto err_free_pci_seg;
if (alloc_alias_table(pci_seg))
- return NULL;
+ goto err_free_dev_table;
if (alloc_rlookup_table(pci_seg))
- return NULL;
+ goto err_free_alias_table;
return pci_seg;
+
+err_free_alias_table:
+ free_alias_table(pci_seg);
+err_free_dev_table:
+ free_dev_table(pci_seg);
+err_free_pci_seg:
+ list_del(&pci_seg->list);
+ kfree(pci_seg);
+ return NULL;
}
static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id,
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
deleted file mode 100644
index 70c2f5b1631b..000000000000
--- a/drivers/iommu/amd/io_pgtable.c
+++ /dev/null
@@ -1,577 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * CPU-agnostic AMD IO page table allocator.
- *
- * Copyright (C) 2020 Advanced Micro Devices, Inc.
- * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
- */
-
-#define pr_fmt(fmt) "AMD-Vi: " fmt
-#define dev_fmt(fmt) pr_fmt(fmt)
-
-#include <linux/atomic.h>
-#include <linux/bitops.h>
-#include <linux/io-pgtable.h>
-#include <linux/kernel.h>
-#include <linux/sizes.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/dma-mapping.h>
-#include <linux/seqlock.h>
-
-#include <asm/barrier.h>
-
-#include "amd_iommu_types.h"
-#include "amd_iommu.h"
-#include "../iommu-pages.h"
-
-/*
- * Helper function to get the first pte of a large mapping
- */
-static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
- unsigned long *count)
-{
- unsigned long pte_mask, pg_size, cnt;
- u64 *fpte;
-
- pg_size = PTE_PAGE_SIZE(*pte);
- cnt = PAGE_SIZE_PTE_COUNT(pg_size);
- pte_mask = ~((cnt << 3) - 1);
- fpte = (u64 *)(((unsigned long)pte) & pte_mask);
-
- if (page_size)
- *page_size = pg_size;
-
- if (count)
- *count = cnt;
-
- return fpte;
-}
-
-static void free_pt_lvl(u64 *pt, struct iommu_pages_list *freelist, int lvl)
-{
- u64 *p;
- int i;
-
- for (i = 0; i < 512; ++i) {
- /* PTE present? */
- if (!IOMMU_PTE_PRESENT(pt[i]))
- continue;
-
- /* Large PTE? */
- if (PM_PTE_LEVEL(pt[i]) == 0 ||
- PM_PTE_LEVEL(pt[i]) == 7)
- continue;
-
- /*
- * Free the next level. No need to look at l1 tables here since
- * they can only contain leaf PTEs; just free them directly.
- */
- p = IOMMU_PTE_PAGE(pt[i]);
- if (lvl > 2)
- free_pt_lvl(p, freelist, lvl - 1);
- else
- iommu_pages_list_add(freelist, p);
- }
-
- iommu_pages_list_add(freelist, pt);
-}
-
-static void free_sub_pt(u64 *root, int mode, struct iommu_pages_list *freelist)
-{
- switch (mode) {
- case PAGE_MODE_NONE:
- case PAGE_MODE_7_LEVEL:
- break;
- case PAGE_MODE_1_LEVEL:
- iommu_pages_list_add(freelist, root);
- break;
- case PAGE_MODE_2_LEVEL:
- case PAGE_MODE_3_LEVEL:
- case PAGE_MODE_4_LEVEL:
- case PAGE_MODE_5_LEVEL:
- case PAGE_MODE_6_LEVEL:
- free_pt_lvl(root, freelist, mode);
- break;
- default:
- BUG();
- }
-}
-
-/*
- * This function is used to add another level to an IO page table. Adding
- * another level increases the size of the address space by 9 bits to a size up
- * to 64 bits.
- */
-static bool increase_address_space(struct amd_io_pgtable *pgtable,
- unsigned long address,
- unsigned int page_size_level,
- gfp_t gfp)
-{
- struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
- struct protection_domain *domain =
- container_of(pgtable, struct protection_domain, iop);
- unsigned long flags;
- bool ret = true;
- u64 *pte;
-
- pte = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp, SZ_4K);
- if (!pte)
- return false;
-
- spin_lock_irqsave(&domain->lock, flags);
-
- if (address <= PM_LEVEL_SIZE(pgtable->mode) &&
- pgtable->mode - 1 >= page_size_level)
- goto out;
-
- ret = false;
- if (WARN_ON_ONCE(pgtable->mode == amd_iommu_hpt_level))
- goto out;
-
- *pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root));
-
- write_seqcount_begin(&pgtable->seqcount);
- pgtable->root = pte;
- pgtable->mode += 1;
- write_seqcount_end(&pgtable->seqcount);
-
- amd_iommu_update_and_flush_device_table(domain);
-
- pte = NULL;
- ret = true;
-
-out:
- spin_unlock_irqrestore(&domain->lock, flags);
- iommu_free_pages(pte);
-
- return ret;
-}
-
-static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
- unsigned long address,
- unsigned long page_size,
- u64 **pte_page,
- gfp_t gfp,
- bool *updated)
-{
- unsigned long last_addr = address + (page_size - 1);
- struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
- unsigned int seqcount;
- int level, end_lvl;
- u64 *pte, *page;
-
- BUG_ON(!is_power_of_2(page_size));
-
- while (last_addr > PM_LEVEL_SIZE(pgtable->mode) ||
- pgtable->mode - 1 < PAGE_SIZE_LEVEL(page_size)) {
- /*
- * Return an error if there is no memory to update the
- * page-table.
- */
- if (!increase_address_space(pgtable, last_addr,
- PAGE_SIZE_LEVEL(page_size), gfp))
- return NULL;
- }
-
-
- do {
- seqcount = read_seqcount_begin(&pgtable->seqcount);
-
- level = pgtable->mode - 1;
- pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
- } while (read_seqcount_retry(&pgtable->seqcount, seqcount));
-
-
- address = PAGE_SIZE_ALIGN(address, page_size);
- end_lvl = PAGE_SIZE_LEVEL(page_size);
-
- while (level > end_lvl) {
- u64 __pte, __npte;
- int pte_level;
-
- __pte = *pte;
- pte_level = PM_PTE_LEVEL(__pte);
-
- /*
- * If we replace a series of large PTEs, we need
- * to tear down all of them.
- */
- if (IOMMU_PTE_PRESENT(__pte) &&
- pte_level == PAGE_MODE_7_LEVEL) {
- unsigned long count, i;
- u64 *lpte;
-
- lpte = first_pte_l7(pte, NULL, &count);
-
- /*
- * Unmap the replicated PTEs that still match the
- * original large mapping
- */
- for (i = 0; i < count; ++i)
- cmpxchg64(&lpte[i], __pte, 0ULL);
-
- *updated = true;
- continue;
- }
-
- if (!IOMMU_PTE_PRESENT(__pte) ||
- pte_level == PAGE_MODE_NONE) {
- page = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp,
- SZ_4K);
-
- if (!page)
- return NULL;
-
- __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
-
- /* pte could have been changed somewhere. */
- if (!try_cmpxchg64(pte, &__pte, __npte))
- iommu_free_pages(page);
- else if (IOMMU_PTE_PRESENT(__pte))
- *updated = true;
-
- continue;
- }
-
- /* No level skipping support yet */
- if (pte_level != level)
- return NULL;
-
- level -= 1;
-
- pte = IOMMU_PTE_PAGE(__pte);
-
- if (pte_page && level == end_lvl)
- *pte_page = pte;
-
- pte = &pte[PM_LEVEL_INDEX(level, address)];
- }
-
- return pte;
-}
-
-/*
- * This function checks if there is a PTE for a given dma address. If
- * there is one, it returns the pointer to it.
- */
-static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
- unsigned long address,
- unsigned long *page_size)
-{
- int level;
- unsigned int seqcount;
- u64 *pte;
-
- *page_size = 0;
-
- if (address > PM_LEVEL_SIZE(pgtable->mode))
- return NULL;
-
- do {
- seqcount = read_seqcount_begin(&pgtable->seqcount);
- level = pgtable->mode - 1;
- pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
- } while (read_seqcount_retry(&pgtable->seqcount, seqcount));
-
- *page_size = PTE_LEVEL_PAGE_SIZE(level);
-
- while (level > 0) {
-
- /* Not Present */
- if (!IOMMU_PTE_PRESENT(*pte))
- return NULL;
-
- /* Large PTE */
- if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL ||
- PM_PTE_LEVEL(*pte) == PAGE_MODE_NONE)
- break;
-
- /* No level skipping support yet */
- if (PM_PTE_LEVEL(*pte) != level)
- return NULL;
-
- level -= 1;
-
- /* Walk to the next level */
- pte = IOMMU_PTE_PAGE(*pte);
- pte = &pte[PM_LEVEL_INDEX(level, address)];
- *page_size = PTE_LEVEL_PAGE_SIZE(level);
- }
-
- /*
- * If we have a series of large PTEs, make
- * sure to return a pointer to the first one.
- */
- if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
- pte = first_pte_l7(pte, page_size, NULL);
-
- return pte;
-}
-
-static void free_clear_pte(u64 *pte, u64 pteval,
- struct iommu_pages_list *freelist)
-{
- u64 *pt;
- int mode;
-
- while (!try_cmpxchg64(pte, &pteval, 0))
- pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
-
- if (!IOMMU_PTE_PRESENT(pteval))
- return;
-
- pt = IOMMU_PTE_PAGE(pteval);
- mode = IOMMU_PTE_MODE(pteval);
-
- free_sub_pt(pt, mode, freelist);
-}
-
-/*
- * Generic mapping functions. It maps a physical address into a DMA
- * address space. It allocates the page table pages if necessary.
- * In the future it can be extended to a generic mapping function
- * supporting all features of AMD IOMMU page tables like level skipping
- * and full 64 bit address spaces.
- */
-static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
- phys_addr_t paddr, size_t pgsize, size_t pgcount,
- int prot, gfp_t gfp, size_t *mapped)
-{
- struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
- struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist);
- bool updated = false;
- u64 __pte, *pte;
- int ret, i, count;
- size_t size = pgcount << __ffs(pgsize);
- unsigned long o_iova = iova;
-
- BUG_ON(!IS_ALIGNED(iova, pgsize));
- BUG_ON(!IS_ALIGNED(paddr, pgsize));
-
- ret = -EINVAL;
- if (!(prot & IOMMU_PROT_MASK))
- goto out;
-
- while (pgcount > 0) {
- count = PAGE_SIZE_PTE_COUNT(pgsize);
- pte = alloc_pte(pgtable, iova, pgsize, NULL, gfp, &updated);
-
- ret = -ENOMEM;
- if (!pte)
- goto out;
-
- for (i = 0; i < count; ++i)
- free_clear_pte(&pte[i], pte[i], &freelist);
-
- if (!iommu_pages_list_empty(&freelist))
- updated = true;
-
- if (count > 1) {
- __pte = PAGE_SIZE_PTE(__sme_set(paddr), pgsize);
- __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
- } else
- __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
-
- if (prot & IOMMU_PROT_IR)
- __pte |= IOMMU_PTE_IR;
- if (prot & IOMMU_PROT_IW)
- __pte |= IOMMU_PTE_IW;
-
- for (i = 0; i < count; ++i)
- pte[i] = __pte;
-
- iova += pgsize;
- paddr += pgsize;
- pgcount--;
- if (mapped)
- *mapped += pgsize;
- }
-
- ret = 0;
-
-out:
- if (updated) {
- struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
- unsigned long flags;
-
- spin_lock_irqsave(&dom->lock, flags);
- /*
- * Flush domain TLB(s) and wait for completion. Any Device-Table
- * Updates and flushing already happened in
- * increase_address_space().
- */
- amd_iommu_domain_flush_pages(dom, o_iova, size);
- spin_unlock_irqrestore(&dom->lock, flags);
- }
-
- /* Everything flushed out, free pages now */
- iommu_put_pages_list(&freelist);
-
- return ret;
-}
-
-static unsigned long iommu_v1_unmap_pages(struct io_pgtable_ops *ops,
- unsigned long iova,
- size_t pgsize, size_t pgcount,
- struct iommu_iotlb_gather *gather)
-{
- struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
- unsigned long long unmapped;
- unsigned long unmap_size;
- u64 *pte;
- size_t size = pgcount << __ffs(pgsize);
-
- BUG_ON(!is_power_of_2(pgsize));
-
- unmapped = 0;
-
- while (unmapped < size) {
- pte = fetch_pte(pgtable, iova, &unmap_size);
- if (pte) {
- int i, count;
-
- count = PAGE_SIZE_PTE_COUNT(unmap_size);
- for (i = 0; i < count; i++)
- pte[i] = 0ULL;
- } else {
- return unmapped;
- }
-
- iova = (iova & ~(unmap_size - 1)) + unmap_size;
- unmapped += unmap_size;
- }
-
- return unmapped;
-}
-
-static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
-{
- struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
- unsigned long offset_mask, pte_pgsize;
- u64 *pte, __pte;
-
- pte = fetch_pte(pgtable, iova, &pte_pgsize);
-
- if (!pte || !IOMMU_PTE_PRESENT(*pte))
- return 0;
-
- offset_mask = pte_pgsize - 1;
- __pte = __sme_clr(*pte & PM_ADDR_MASK);
-
- return (__pte & ~offset_mask) | (iova & offset_mask);
-}
-
-static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size,
- unsigned long flags)
-{
- bool test_only = flags & IOMMU_DIRTY_NO_CLEAR;
- bool dirty = false;
- int i, count;
-
- /*
- * 2.2.3.2 Host Dirty Support
- * When a non-default page size is used , software must OR the
- * Dirty bits in all of the replicated host PTEs used to map
- * the page. The IOMMU does not guarantee the Dirty bits are
- * set in all of the replicated PTEs. Any portion of the page
- * may have been written even if the Dirty bit is set in only
- * one of the replicated PTEs.
- */
- count = PAGE_SIZE_PTE_COUNT(size);
- for (i = 0; i < count && test_only; i++) {
- if (test_bit(IOMMU_PTE_HD_BIT, (unsigned long *)&ptep[i])) {
- dirty = true;
- break;
- }
- }
-
- for (i = 0; i < count && !test_only; i++) {
- if (test_and_clear_bit(IOMMU_PTE_HD_BIT,
- (unsigned long *)&ptep[i])) {
- dirty = true;
- }
- }
-
- return dirty;
-}
-
-static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops,
- unsigned long iova, size_t size,
- unsigned long flags,
- struct iommu_dirty_bitmap *dirty)
-{
- struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
- unsigned long end = iova + size - 1;
-
- do {
- unsigned long pgsize = 0;
- u64 *ptep, pte;
-
- ptep = fetch_pte(pgtable, iova, &pgsize);
- if (ptep)
- pte = READ_ONCE(*ptep);
- if (!ptep || !IOMMU_PTE_PRESENT(pte)) {
- pgsize = pgsize ?: PTE_LEVEL_PAGE_SIZE(0);
- iova += pgsize;
- continue;
- }
-
- /*
- * Mark the whole IOVA range as dirty even if only one of
- * the replicated PTEs were marked dirty.
- */
- if (pte_test_and_clear_dirty(ptep, pgsize, flags))
- iommu_dirty_bitmap_record(dirty, iova, pgsize);
- iova += pgsize;
- } while (iova < end);
-
- return 0;
-}
-
-/*
- * ----------------------------------------------------
- */
-static void v1_free_pgtable(struct io_pgtable *iop)
-{
- struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl);
- struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist);
-
- if (pgtable->mode == PAGE_MODE_NONE)
- return;
-
- /* Page-table is not visible to IOMMU anymore, so free it */
- BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
- pgtable->mode > amd_iommu_hpt_level);
-
- free_sub_pt(pgtable->root, pgtable->mode, &freelist);
- iommu_put_pages_list(&freelist);
-}
-
-static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
-{
- struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
-
- pgtable->root =
- iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K);
- if (!pgtable->root)
- return NULL;
- pgtable->mode = PAGE_MODE_3_LEVEL;
- seqcount_init(&pgtable->seqcount);
-
- cfg->pgsize_bitmap = amd_iommu_pgsize_bitmap;
- cfg->ias = IOMMU_IN_ADDR_BIT_SIZE;
- cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE;
-
- pgtable->pgtbl.ops.map_pages = iommu_v1_map_pages;
- pgtable->pgtbl.ops.unmap_pages = iommu_v1_unmap_pages;
- pgtable->pgtbl.ops.iova_to_phys = iommu_v1_iova_to_phys;
- pgtable->pgtbl.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty;
-
- return &pgtable->pgtbl;
-}
-
-struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = {
- .alloc = v1_alloc_pgtable,
- .free = v1_free_pgtable,
-};
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
deleted file mode 100644
index b47941353ccb..000000000000
--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ /dev/null
@@ -1,370 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * CPU-agnostic AMD IO page table v2 allocator.
- *
- * Copyright (C) 2022, 2023 Advanced Micro Devices, Inc.
- * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
- * Author: Vasant Hegde <vasant.hegde@amd.com>
- */
-
-#define pr_fmt(fmt) "AMD-Vi: " fmt
-#define dev_fmt(fmt) pr_fmt(fmt)
-
-#include <linux/bitops.h>
-#include <linux/io-pgtable.h>
-#include <linux/kernel.h>
-
-#include <asm/barrier.h>
-
-#include "amd_iommu_types.h"
-#include "amd_iommu.h"
-#include "../iommu-pages.h"
-
-#define IOMMU_PAGE_PRESENT BIT_ULL(0) /* Is present */
-#define IOMMU_PAGE_RW BIT_ULL(1) /* Writeable */
-#define IOMMU_PAGE_USER BIT_ULL(2) /* Userspace addressable */
-#define IOMMU_PAGE_PWT BIT_ULL(3) /* Page write through */
-#define IOMMU_PAGE_PCD BIT_ULL(4) /* Page cache disabled */
-#define IOMMU_PAGE_ACCESS BIT_ULL(5) /* Was accessed (updated by IOMMU) */
-#define IOMMU_PAGE_DIRTY BIT_ULL(6) /* Was written to (updated by IOMMU) */
-#define IOMMU_PAGE_PSE BIT_ULL(7) /* Page Size Extensions */
-#define IOMMU_PAGE_NX BIT_ULL(63) /* No execute */
-
-#define MAX_PTRS_PER_PAGE 512
-
-#define IOMMU_PAGE_SIZE_2M BIT_ULL(21)
-#define IOMMU_PAGE_SIZE_1G BIT_ULL(30)
-
-
-static inline int get_pgtable_level(void)
-{
- return amd_iommu_gpt_level;
-}
-
-static inline bool is_large_pte(u64 pte)
-{
- return (pte & IOMMU_PAGE_PSE);
-}
-
-static inline u64 set_pgtable_attr(u64 *page)
-{
- u64 prot;
-
- prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER;
- prot |= IOMMU_PAGE_ACCESS;
-
- return (iommu_virt_to_phys(page) | prot);
-}
-
-static inline void *get_pgtable_pte(u64 pte)
-{
- return iommu_phys_to_virt(pte & PM_ADDR_MASK);
-}
-
-static u64 set_pte_attr(u64 paddr, u64 pg_size, int prot)
-{
- u64 pte;
-
- pte = __sme_set(paddr & PM_ADDR_MASK);
- pte |= IOMMU_PAGE_PRESENT | IOMMU_PAGE_USER;
- pte |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY;
-
- if (prot & IOMMU_PROT_IW)
- pte |= IOMMU_PAGE_RW;
-
- /* Large page */
- if (pg_size == IOMMU_PAGE_SIZE_1G || pg_size == IOMMU_PAGE_SIZE_2M)
- pte |= IOMMU_PAGE_PSE;
-
- return pte;
-}
-
-static inline u64 get_alloc_page_size(u64 size)
-{
- if (size >= IOMMU_PAGE_SIZE_1G)
- return IOMMU_PAGE_SIZE_1G;
-
- if (size >= IOMMU_PAGE_SIZE_2M)
- return IOMMU_PAGE_SIZE_2M;
-
- return PAGE_SIZE;
-}
-
-static inline int page_size_to_level(u64 pg_size)
-{
- if (pg_size == IOMMU_PAGE_SIZE_1G)
- return PAGE_MODE_3_LEVEL;
- if (pg_size == IOMMU_PAGE_SIZE_2M)
- return PAGE_MODE_2_LEVEL;
-
- return PAGE_MODE_1_LEVEL;
-}
-
-static void free_pgtable(u64 *pt, int level)
-{
- u64 *p;
- int i;
-
- for (i = 0; i < MAX_PTRS_PER_PAGE; i++) {
- /* PTE present? */
- if (!IOMMU_PTE_PRESENT(pt[i]))
- continue;
-
- if (is_large_pte(pt[i]))
- continue;
-
- /*
- * Free the next level. No need to look at l1 tables here since
- * they can only contain leaf PTEs; just free them directly.
- */
- p = get_pgtable_pte(pt[i]);
- if (level > 2)
- free_pgtable(p, level - 1);
- else
- iommu_free_pages(p);
- }
-
- iommu_free_pages(pt);
-}
-
-/* Allocate page table */
-static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova,
- unsigned long pg_size, gfp_t gfp, bool *updated)
-{
- u64 *pte, *page;
- int level, end_level;
-
- level = get_pgtable_level() - 1;
- end_level = page_size_to_level(pg_size);
- pte = &pgd[PM_LEVEL_INDEX(level, iova)];
- iova = PAGE_SIZE_ALIGN(iova, PAGE_SIZE);
-
- while (level >= end_level) {
- u64 __pte, __npte;
-
- __pte = *pte;
-
- if (IOMMU_PTE_PRESENT(__pte) && is_large_pte(__pte)) {
- /* Unmap large pte */
- cmpxchg64(pte, *pte, 0ULL);
- *updated = true;
- continue;
- }
-
- if (!IOMMU_PTE_PRESENT(__pte)) {
- page = iommu_alloc_pages_node_sz(nid, gfp, SZ_4K);
- if (!page)
- return NULL;
-
- __npte = set_pgtable_attr(page);
- /* pte could have been changed somewhere. */
- if (!try_cmpxchg64(pte, &__pte, __npte))
- iommu_free_pages(page);
- else if (IOMMU_PTE_PRESENT(__pte))
- *updated = true;
-
- continue;
- }
-
- level -= 1;
- pte = get_pgtable_pte(__pte);
- pte = &pte[PM_LEVEL_INDEX(level, iova)];
- }
-
- /* Tear down existing pte entries */
- if (IOMMU_PTE_PRESENT(*pte)) {
- u64 *__pte;
-
- *updated = true;
- __pte = get_pgtable_pte(*pte);
- cmpxchg64(pte, *pte, 0ULL);
- if (pg_size == IOMMU_PAGE_SIZE_1G)
- free_pgtable(__pte, end_level - 1);
- else if (pg_size == IOMMU_PAGE_SIZE_2M)
- iommu_free_pages(__pte);
- }
-
- return pte;
-}
-
-/*
- * This function checks if there is a PTE for a given dma address.
- * If there is one, it returns the pointer to it.
- */
-static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
- unsigned long iova, unsigned long *page_size)
-{
- u64 *pte;
- int level;
-
- level = get_pgtable_level() - 1;
- pte = &pgtable->pgd[PM_LEVEL_INDEX(level, iova)];
- /* Default page size is 4K */
- *page_size = PAGE_SIZE;
-
- while (level) {
- /* Not present */
- if (!IOMMU_PTE_PRESENT(*pte))
- return NULL;
-
- /* Walk to the next level */
- pte = get_pgtable_pte(*pte);
- pte = &pte[PM_LEVEL_INDEX(level - 1, iova)];
-
- /* Large page */
- if (is_large_pte(*pte)) {
- if (level == PAGE_MODE_3_LEVEL)
- *page_size = IOMMU_PAGE_SIZE_1G;
- else if (level == PAGE_MODE_2_LEVEL)
- *page_size = IOMMU_PAGE_SIZE_2M;
- else
- return NULL; /* Wrongly set PSE bit in PTE */
-
- break;
- }
-
- level -= 1;
- }
-
- return pte;
-}
-
-static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
- phys_addr_t paddr, size_t pgsize, size_t pgcount,
- int prot, gfp_t gfp, size_t *mapped)
-{
- struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
- struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
- u64 *pte;
- unsigned long map_size;
- unsigned long mapped_size = 0;
- unsigned long o_iova = iova;
- size_t size = pgcount << __ffs(pgsize);
- int ret = 0;
- bool updated = false;
-
- if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize) || !pgcount)
- return -EINVAL;
-
- if (!(prot & IOMMU_PROT_MASK))
- return -EINVAL;
-
- while (mapped_size < size) {
- map_size = get_alloc_page_size(pgsize);
- pte = v2_alloc_pte(cfg->amd.nid, pgtable->pgd,
- iova, map_size, gfp, &updated);
- if (!pte) {
- ret = -ENOMEM;
- goto out;
- }
-
- *pte = set_pte_attr(paddr, map_size, prot);
-
- iova += map_size;
- paddr += map_size;
- mapped_size += map_size;
- }
-
-out:
- if (updated) {
- struct protection_domain *pdom = io_pgtable_ops_to_domain(ops);
- unsigned long flags;
-
- spin_lock_irqsave(&pdom->lock, flags);
- amd_iommu_domain_flush_pages(pdom, o_iova, size);
- spin_unlock_irqrestore(&pdom->lock, flags);
- }
-
- if (mapped)
- *mapped += mapped_size;
-
- return ret;
-}
-
-static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops,
- unsigned long iova,
- size_t pgsize, size_t pgcount,
- struct iommu_iotlb_gather *gather)
-{
- struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
- struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
- unsigned long unmap_size;
- unsigned long unmapped = 0;
- size_t size = pgcount << __ffs(pgsize);
- u64 *pte;
-
- if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount))
- return 0;
-
- while (unmapped < size) {
- pte = fetch_pte(pgtable, iova, &unmap_size);
- if (!pte)
- return unmapped;
-
- *pte = 0ULL;
-
- iova = (iova & ~(unmap_size - 1)) + unmap_size;
- unmapped += unmap_size;
- }
-
- return unmapped;
-}
-
-static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
-{
- struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
- unsigned long offset_mask, pte_pgsize;
- u64 *pte, __pte;
-
- pte = fetch_pte(pgtable, iova, &pte_pgsize);
- if (!pte || !IOMMU_PTE_PRESENT(*pte))
- return 0;
-
- offset_mask = pte_pgsize - 1;
- __pte = __sme_clr(*pte & PM_ADDR_MASK);
-
- return (__pte & ~offset_mask) | (iova & offset_mask);
-}
-
-/*
- * ----------------------------------------------------
- */
-static void v2_free_pgtable(struct io_pgtable *iop)
-{
- struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl);
-
- if (!pgtable || !pgtable->pgd)
- return;
-
- /* Free page table */
- free_pgtable(pgtable->pgd, get_pgtable_level());
- pgtable->pgd = NULL;
-}
-
-static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
-{
- struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
- int ias = IOMMU_IN_ADDR_BIT_SIZE;
-
- pgtable->pgd = iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K);
- if (!pgtable->pgd)
- return NULL;
-
- if (get_pgtable_level() == PAGE_MODE_5_LEVEL)
- ias = 57;
-
- pgtable->pgtbl.ops.map_pages = iommu_v2_map_pages;
- pgtable->pgtbl.ops.unmap_pages = iommu_v2_unmap_pages;
- pgtable->pgtbl.ops.iova_to_phys = iommu_v2_iova_to_phys;
-
- cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
- cfg->ias = ias;
- cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE;
-
- return &pgtable->pgtbl;
-}
-
-struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = {
- .alloc = v2_alloc_pgtable,
- .free = v2_free_pgtable,
-};
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 2e1865daa1ce..9f1d56a5e145 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -30,7 +30,6 @@
#include <linux/msi.h>
#include <linux/irqdomain.h>
#include <linux/percpu.h>
-#include <linux/io-pgtable.h>
#include <linux/cc_platform.h>
#include <asm/irq_remapping.h>
#include <asm/io_apic.h>
@@ -41,9 +40,9 @@
#include <asm/gart.h>
#include <asm/dma.h>
#include <uapi/linux/iommufd.h>
+#include <linux/generic_pt/iommu.h>
#include "amd_iommu.h"
-#include "../dma-iommu.h"
#include "../irq_remapping.h"
#include "../iommu-pages.h"
@@ -60,7 +59,6 @@ LIST_HEAD(hpet_map);
LIST_HEAD(acpihid_map);
const struct iommu_ops amd_iommu_ops;
-static const struct iommu_dirty_ops amd_dirty_ops;
int amd_iommu_max_glx_val = -1;
@@ -70,15 +68,22 @@ int amd_iommu_max_glx_val = -1;
*/
DEFINE_IDA(pdom_ids);
-static int amd_iommu_attach_device(struct iommu_domain *dom,
- struct device *dev);
+static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev,
+ struct iommu_domain *old);
static void set_dte_entry(struct amd_iommu *iommu,
- struct iommu_dev_data *dev_data);
+ struct iommu_dev_data *dev_data,
+ phys_addr_t top_paddr, unsigned int top_level);
+
+static void amd_iommu_change_top(struct pt_iommu *iommu_table,
+ phys_addr_t top_paddr, unsigned int top_level);
static void iommu_flush_dte_sync(struct amd_iommu *iommu, u16 devid);
static struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid);
+static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain);
+static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
+ bool enable);
/****************************************************************************
*
@@ -1157,6 +1162,25 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
*
****************************************************************************/
+static void dump_command_buffer(struct amd_iommu *iommu)
+{
+ struct iommu_cmd *cmd;
+ u32 head, tail;
+ int i;
+
+ head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
+ tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+
+ pr_err("CMD Buffer head=%llu tail=%llu\n", MMIO_CMD_BUFFER_HEAD(head),
+ MMIO_CMD_BUFFER_TAIL(tail));
+
+ for (i = 0; i < CMD_BUFFER_ENTRIES; i++) {
+ cmd = (struct iommu_cmd *)(iommu->cmd_buf + i * sizeof(*cmd));
+ pr_err("%3d: %08x %08x %08x %08x\n", i, cmd->data[0], cmd->data[1], cmd->data[2],
+ cmd->data[3]);
+ }
+}
+
static int wait_on_sem(struct amd_iommu *iommu, u64 data)
{
int i = 0;
@@ -1167,7 +1191,14 @@ static int wait_on_sem(struct amd_iommu *iommu, u64 data)
}
if (i == LOOP_TIMEOUT) {
- pr_alert("Completion-Wait loop timed out\n");
+
+ pr_alert("IOMMU %04x:%02x:%02x.%01x: Completion-Wait loop timed out\n",
+ iommu->pci_seg->id, PCI_BUS_NUM(iommu->devid),
+ PCI_SLOT(iommu->devid), PCI_FUNC(iommu->devid));
+
+ if (amd_iommu_dump)
+ DO_ONCE_LITE(dump_command_buffer, iommu);
+
return -EIO;
}
@@ -1756,42 +1787,6 @@ static void dev_flush_pasid_all(struct iommu_dev_data *dev_data,
CMD_INV_IOMMU_ALL_PAGES_ADDRESS);
}
-/* Flush the not present cache if it exists */
-static void domain_flush_np_cache(struct protection_domain *domain,
- dma_addr_t iova, size_t size)
-{
- if (unlikely(amd_iommu_np_cache)) {
- unsigned long flags;
-
- spin_lock_irqsave(&domain->lock, flags);
- amd_iommu_domain_flush_pages(domain, iova, size);
- spin_unlock_irqrestore(&domain->lock, flags);
- }
-}
-
-
-/*
- * This function flushes the DTEs for all devices in domain
- */
-void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
-{
- struct iommu_dev_data *dev_data;
-
- lockdep_assert_held(&domain->lock);
-
- list_for_each_entry(dev_data, &domain->dev_list, list) {
- struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev);
-
- set_dte_entry(iommu, dev_data);
- clone_aliases(iommu, dev_data->dev);
- }
-
- list_for_each_entry(dev_data, &domain->dev_list, list)
- device_flush_dte(dev_data);
-
- domain_flush_complete(domain);
-}
-
int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag)
{
struct iommu_dev_data *dev_data;
@@ -2051,7 +2046,8 @@ static void set_dte_gcr3_table(struct amd_iommu *iommu,
}
static void set_dte_entry(struct amd_iommu *iommu,
- struct iommu_dev_data *dev_data)
+ struct iommu_dev_data *dev_data,
+ phys_addr_t top_paddr, unsigned int top_level)
{
u16 domid;
u32 old_domid;
@@ -2060,19 +2056,36 @@ static void set_dte_entry(struct amd_iommu *iommu,
struct protection_domain *domain = dev_data->domain;
struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];
+ struct pt_iommu_amdv1_hw_info pt_info;
+
+ make_clear_dte(dev_data, dte, &new);
if (gcr3_info && gcr3_info->gcr3_tbl)
domid = dev_data->gcr3_info.domid;
- else
+ else {
domid = domain->id;
- make_clear_dte(dev_data, dte, &new);
-
- if (domain->iop.mode != PAGE_MODE_NONE)
- new.data[0] |= iommu_virt_to_phys(domain->iop.root);
+ if (domain->domain.type & __IOMMU_DOMAIN_PAGING) {
+ /*
+ * When updating the IO pagetable, the new top and level
+ * are provided as parameters. For other operations i.e.
+ * device attach, retrieve the current pagetable info
+ * via the IOMMU PT API.
+ */
+ if (top_paddr) {
+ pt_info.host_pt_root = top_paddr;
+ pt_info.mode = top_level + 1;
+ } else {
+ WARN_ON(top_paddr || top_level);
+ pt_iommu_amdv1_hw_info(&domain->amdv1,
+ &pt_info);
+ }
- new.data[0] |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
- << DEV_ENTRY_MODE_SHIFT;
+ new.data[0] |= __sme_set(pt_info.host_pt_root) |
+ (pt_info.mode & DEV_ENTRY_MODE_MASK)
+ << DEV_ENTRY_MODE_SHIFT;
+ }
+ }
new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW;
@@ -2138,7 +2151,7 @@ static void dev_update_dte(struct iommu_dev_data *dev_data, bool set)
struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev);
if (set)
- set_dte_entry(iommu, dev_data);
+ set_dte_entry(iommu, dev_data, 0, 0);
else
clear_dte_entry(iommu, dev_data);
@@ -2156,6 +2169,7 @@ static int init_gcr3_table(struct iommu_dev_data *dev_data,
{
struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
int max_pasids = dev_data->max_pasids;
+ struct pt_iommu_x86_64_hw_info pt_info;
int ret = 0;
/*
@@ -2178,7 +2192,8 @@ static int init_gcr3_table(struct iommu_dev_data *dev_data,
if (!pdom_is_v2_pgtbl_mode(pdom))
return ret;
- ret = update_gcr3(dev_data, 0, iommu_virt_to_phys(pdom->iop.pgd), true);
+ pt_iommu_x86_64_hw_info(&pdom->amdv2, &pt_info);
+ ret = update_gcr3(dev_data, 0, __sme_set(pt_info.gcr3_pt), true);
if (ret)
free_gcr3_table(&dev_data->gcr3_info);
@@ -2500,94 +2515,240 @@ struct protection_domain *protection_domain_alloc(void)
return domain;
}
-static int pdom_setup_pgtable(struct protection_domain *domain,
- struct device *dev)
+static bool amd_iommu_hd_support(struct amd_iommu *iommu)
+{
+ if (amd_iommu_hatdis)
+ return false;
+
+ return iommu && (iommu->features & FEATURE_HDSUP);
+}
+
+static spinlock_t *amd_iommu_get_top_lock(struct pt_iommu *iommupt)
{
- struct io_pgtable_ops *pgtbl_ops;
- enum io_pgtable_fmt fmt;
+ struct protection_domain *pdom =
+ container_of(iommupt, struct protection_domain, iommu);
- switch (domain->pd_mode) {
- case PD_MODE_V1:
- fmt = AMD_IOMMU_V1;
- break;
- case PD_MODE_V2:
- fmt = AMD_IOMMU_V2;
- break;
- case PD_MODE_NONE:
- WARN_ON_ONCE(1);
- return -EPERM;
+ return &pdom->lock;
+}
+
+/*
+ * Update all HW references to the domain with a new pgtable configuration.
+ */
+static void amd_iommu_change_top(struct pt_iommu *iommu_table,
+ phys_addr_t top_paddr, unsigned int top_level)
+{
+ struct protection_domain *pdom =
+ container_of(iommu_table, struct protection_domain, iommu);
+ struct iommu_dev_data *dev_data;
+
+ lockdep_assert_held(&pdom->lock);
+
+ /* Update the DTE for all devices attached to this domain */
+ list_for_each_entry(dev_data, &pdom->dev_list, list) {
+ struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev);
+
+ /* Update the HW references with the new level and top ptr */
+ set_dte_entry(iommu, dev_data, top_paddr, top_level);
+ clone_aliases(iommu, dev_data->dev);
}
- domain->iop.pgtbl.cfg.amd.nid = dev_to_node(dev);
- pgtbl_ops = alloc_io_pgtable_ops(fmt, &domain->iop.pgtbl.cfg, domain);
- if (!pgtbl_ops)
- return -ENOMEM;
+ list_for_each_entry(dev_data, &pdom->dev_list, list)
+ device_flush_dte(dev_data);
+
+ domain_flush_complete(pdom);
+}
+
+/*
+ * amd_iommu_iotlb_sync_map() is used to generate flushes for non-present to
+ * present (ie mapping) operations. It is a NOP if the IOMMU doesn't have non
+ * present caching (like hypervisor shadowing).
+ */
+static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
+ unsigned long iova, size_t size)
+{
+ struct protection_domain *domain = to_pdomain(dom);
+ unsigned long flags;
+ if (likely(!amd_iommu_np_cache))
+ return 0;
+
+ spin_lock_irqsave(&domain->lock, flags);
+ amd_iommu_domain_flush_pages(domain, iova, size);
+ spin_unlock_irqrestore(&domain->lock, flags);
return 0;
}
-static inline u64 dma_max_address(enum protection_domain_mode pgtable)
+static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
- if (pgtable == PD_MODE_V1)
- return PM_LEVEL_SIZE(amd_iommu_hpt_level);
+ struct protection_domain *dom = to_pdomain(domain);
+ unsigned long flags;
- /*
- * V2 with 4/5 level page table. Note that "2.2.6.5 AMD64 4-Kbyte Page
- * Translation" shows that the V2 table sign extends the top of the
- * address space creating a reserved region in the middle of the
- * translation, just like the CPU does. Further Vasant says the docs are
- * incomplete and this only applies to non-zero PASIDs. If the AMDv2
- * page table is assigned to the 0 PASID then there is no sign extension
- * check.
- *
- * Since the IOMMU must have a fixed geometry, and the core code does
- * not understand sign extended addressing, we have to chop off the high
- * bit to get consistent behavior with attachments of the domain to any
- * PASID.
- */
- return ((1ULL << (PM_LEVEL_SHIFT(amd_iommu_gpt_level) - 1)) - 1);
+ spin_lock_irqsave(&dom->lock, flags);
+ amd_iommu_domain_flush_all(dom);
+ spin_unlock_irqrestore(&dom->lock, flags);
}
-static bool amd_iommu_hd_support(struct amd_iommu *iommu)
+static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather)
{
- if (amd_iommu_hatdis)
- return false;
+ struct protection_domain *dom = to_pdomain(domain);
+ unsigned long flags;
- return iommu && (iommu->features & FEATURE_HDSUP);
+ spin_lock_irqsave(&dom->lock, flags);
+ amd_iommu_domain_flush_pages(dom, gather->start,
+ gather->end - gather->start + 1);
+ spin_unlock_irqrestore(&dom->lock, flags);
+ iommu_put_pages_list(&gather->freelist);
}
-static struct iommu_domain *
-do_iommu_domain_alloc(struct device *dev, u32 flags,
- enum protection_domain_mode pgtable)
+static const struct pt_iommu_driver_ops amd_hw_driver_ops_v1 = {
+ .get_top_lock = amd_iommu_get_top_lock,
+ .change_top = amd_iommu_change_top,
+};
+
+static const struct iommu_domain_ops amdv1_ops = {
+ IOMMU_PT_DOMAIN_OPS(amdv1),
+ .iotlb_sync_map = amd_iommu_iotlb_sync_map,
+ .flush_iotlb_all = amd_iommu_flush_iotlb_all,
+ .iotlb_sync = amd_iommu_iotlb_sync,
+ .attach_dev = amd_iommu_attach_device,
+ .free = amd_iommu_domain_free,
+ .enforce_cache_coherency = amd_iommu_enforce_cache_coherency,
+};
+
+static const struct iommu_dirty_ops amdv1_dirty_ops = {
+ IOMMU_PT_DIRTY_OPS(amdv1),
+ .set_dirty_tracking = amd_iommu_set_dirty_tracking,
+};
+
+static struct iommu_domain *amd_iommu_domain_alloc_paging_v1(struct device *dev,
+ u32 flags)
{
- bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
- struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
+ struct pt_iommu_amdv1_cfg cfg = {};
struct protection_domain *domain;
int ret;
+ if (amd_iommu_hatdis)
+ return ERR_PTR(-EOPNOTSUPP);
+
domain = protection_domain_alloc();
if (!domain)
return ERR_PTR(-ENOMEM);
- domain->pd_mode = pgtable;
- ret = pdom_setup_pgtable(domain, dev);
+ domain->pd_mode = PD_MODE_V1;
+ domain->iommu.driver_ops = &amd_hw_driver_ops_v1;
+ domain->iommu.nid = dev_to_node(dev);
+ if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING)
+ domain->domain.dirty_ops = &amdv1_dirty_ops;
+
+ /*
+ * Someday FORCE_COHERENCE should be set by
+ * amd_iommu_enforce_cache_coherency() like VT-d does.
+ */
+ cfg.common.features = BIT(PT_FEAT_DYNAMIC_TOP) |
+ BIT(PT_FEAT_AMDV1_ENCRYPT_TABLES) |
+ BIT(PT_FEAT_AMDV1_FORCE_COHERENCE);
+
+ /*
+ * AMD's IOMMU can flush as many pages as necessary in a single flush.
+ * Unless we run in a virtual machine, which can be inferred according
+ * to whether "non-present cache" is on, it is probably best to prefer
+ * (potentially) too extensive TLB flushing (i.e., more misses) over
+ * multiple TLB flushes (i.e., more flushes). For virtual machines the
+ * hypervisor needs to synchronize the host IOMMU PTEs with those of
+ * the guest, and the trade-off is different: unnecessary TLB flushes
+ * should be avoided.
+ */
+ if (amd_iommu_np_cache)
+ cfg.common.features |= BIT(PT_FEAT_FLUSH_RANGE_NO_GAPS);
+ else
+ cfg.common.features |= BIT(PT_FEAT_FLUSH_RANGE);
+
+ cfg.common.hw_max_vasz_lg2 =
+ min(64, (amd_iommu_hpt_level - 1) * 9 + 21);
+ cfg.common.hw_max_oasz_lg2 = 52;
+ cfg.starting_level = 2;
+ domain->domain.ops = &amdv1_ops;
+
+ ret = pt_iommu_amdv1_init(&domain->amdv1, &cfg, GFP_KERNEL);
if (ret) {
- pdom_id_free(domain->id);
- kfree(domain);
+ amd_iommu_domain_free(&domain->domain);
return ERR_PTR(ret);
}
- domain->domain.geometry.aperture_start = 0;
- domain->domain.geometry.aperture_end = dma_max_address(pgtable);
- domain->domain.geometry.force_aperture = true;
- domain->domain.pgsize_bitmap = domain->iop.pgtbl.cfg.pgsize_bitmap;
+ /*
+ * Narrow the supported page sizes to those selected by the kernel
+ * command line.
+ */
+ domain->domain.pgsize_bitmap &= amd_iommu_pgsize_bitmap;
+ return &domain->domain;
+}
- domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
- domain->domain.ops = iommu->iommu.ops->default_domain_ops;
+static const struct iommu_domain_ops amdv2_ops = {
+ IOMMU_PT_DOMAIN_OPS(x86_64),
+ .iotlb_sync_map = amd_iommu_iotlb_sync_map,
+ .flush_iotlb_all = amd_iommu_flush_iotlb_all,
+ .iotlb_sync = amd_iommu_iotlb_sync,
+ .attach_dev = amd_iommu_attach_device,
+ .free = amd_iommu_domain_free,
+ /*
+ * Note the AMDv2 page table format does not support a Force Coherency
+ * bit, so enforce_cache_coherency should not be set. However VFIO is
+ * not prepared to handle a case where some domains will support
+ * enforcement and others do not. VFIO and iommufd will have to be fixed
+ * before it can fully use the V2 page table. See the comment in
+ * iommufd_hwpt_paging_alloc(). For now leave things as they have
+ * historically been and lie about enforce_cache_coherencey.
+ */
+ .enforce_cache_coherency = amd_iommu_enforce_cache_coherency,
+};
- if (dirty_tracking)
- domain->domain.dirty_ops = &amd_dirty_ops;
+static struct iommu_domain *amd_iommu_domain_alloc_paging_v2(struct device *dev,
+ u32 flags)
+{
+ struct pt_iommu_x86_64_cfg cfg = {};
+ struct protection_domain *domain;
+ int ret;
+ if (!amd_iommu_v2_pgtbl_supported())
+ return ERR_PTR(-EOPNOTSUPP);
+
+ domain = protection_domain_alloc();
+ if (!domain)
+ return ERR_PTR(-ENOMEM);
+
+ domain->pd_mode = PD_MODE_V2;
+ domain->iommu.nid = dev_to_node(dev);
+
+ cfg.common.features = BIT(PT_FEAT_X86_64_AMD_ENCRYPT_TABLES);
+ if (amd_iommu_np_cache)
+ cfg.common.features |= BIT(PT_FEAT_FLUSH_RANGE_NO_GAPS);
+ else
+ cfg.common.features |= BIT(PT_FEAT_FLUSH_RANGE);
+
+ /*
+ * The v2 table behaves differently if it is attached to PASID 0 vs a
+ * non-zero PASID. On PASID 0 it has no sign extension and the full
+ * 57/48 bits decode the lower addresses. Otherwise it behaves like a
+ * normal sign extended x86 page table. Since we want the domain to work
+ * in both modes the top bit is removed and PT_FEAT_SIGN_EXTEND is not
+ * set which creates a table that is compatible in both modes.
+ */
+ if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) {
+ cfg.common.hw_max_vasz_lg2 = 56;
+ cfg.top_level = 4;
+ } else {
+ cfg.common.hw_max_vasz_lg2 = 47;
+ cfg.top_level = 3;
+ }
+ cfg.common.hw_max_oasz_lg2 = 52;
+ domain->domain.ops = &amdv2_ops;
+
+ ret = pt_iommu_x86_64_init(&domain->amdv2, &cfg, GFP_KERNEL);
+ if (ret) {
+ amd_iommu_domain_free(&domain->domain);
+ return ERR_PTR(ret);
+ }
return &domain->domain;
}
@@ -2608,15 +2769,27 @@ amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
/* Allocate domain with v1 page table for dirty tracking */
if (!amd_iommu_hd_support(iommu))
break;
- return do_iommu_domain_alloc(dev, flags, PD_MODE_V1);
+ return amd_iommu_domain_alloc_paging_v1(dev, flags);
case IOMMU_HWPT_ALLOC_PASID:
/* Allocate domain with v2 page table if IOMMU supports PASID. */
if (!amd_iommu_pasid_supported())
break;
- return do_iommu_domain_alloc(dev, flags, PD_MODE_V2);
- case 0:
+ return amd_iommu_domain_alloc_paging_v2(dev, flags);
+ case 0: {
+ struct iommu_domain *ret;
+
/* If nothing specific is required use the kernel commandline default */
- return do_iommu_domain_alloc(dev, 0, amd_iommu_pgtable);
+ if (amd_iommu_pgtable == PD_MODE_V1) {
+ ret = amd_iommu_domain_alloc_paging_v1(dev, flags);
+ if (ret != ERR_PTR(-EOPNOTSUPP))
+ return ret;
+ return amd_iommu_domain_alloc_paging_v2(dev, flags);
+ }
+ ret = amd_iommu_domain_alloc_paging_v2(dev, flags);
+ if (ret != ERR_PTR(-EOPNOTSUPP))
+ return ret;
+ return amd_iommu_domain_alloc_paging_v1(dev, flags);
+ }
default:
break;
}
@@ -2628,14 +2801,14 @@ void amd_iommu_domain_free(struct iommu_domain *dom)
struct protection_domain *domain = to_pdomain(dom);
WARN_ON(!list_empty(&domain->dev_list));
- if (domain->domain.type & __IOMMU_DOMAIN_PAGING)
- free_io_pgtable_ops(&domain->iop.pgtbl.ops);
+ pt_iommu_deinit(&domain->iommu);
pdom_id_free(domain->id);
kfree(domain);
}
static int blocked_domain_attach_device(struct iommu_domain *domain,
- struct device *dev)
+ struct device *dev,
+ struct iommu_domain *old)
{
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
@@ -2685,16 +2858,8 @@ void amd_iommu_init_identity_domain(void)
protection_domain_init(&identity_domain);
}
-/* Same as blocked domain except it supports only ops->attach_dev() */
-static struct iommu_domain release_domain = {
- .type = IOMMU_DOMAIN_BLOCKED,
- .ops = &(const struct iommu_domain_ops) {
- .attach_dev = blocked_domain_attach_device,
- }
-};
-
-static int amd_iommu_attach_device(struct iommu_domain *dom,
- struct device *dev)
+static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev,
+ struct iommu_domain *old)
{
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
struct protection_domain *domain = to_pdomain(dom);
@@ -2734,93 +2899,6 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
return ret;
}
-static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
- unsigned long iova, size_t size)
-{
- struct protection_domain *domain = to_pdomain(dom);
- struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
-
- if (ops->map_pages)
- domain_flush_np_cache(domain, iova, size);
- return 0;
-}
-
-static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova,
- phys_addr_t paddr, size_t pgsize, size_t pgcount,
- int iommu_prot, gfp_t gfp, size_t *mapped)
-{
- struct protection_domain *domain = to_pdomain(dom);
- struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
- int prot = 0;
- int ret = -EINVAL;
-
- if ((domain->pd_mode == PD_MODE_V1) &&
- (domain->iop.mode == PAGE_MODE_NONE))
- return -EINVAL;
-
- if (iommu_prot & IOMMU_READ)
- prot |= IOMMU_PROT_IR;
- if (iommu_prot & IOMMU_WRITE)
- prot |= IOMMU_PROT_IW;
-
- if (ops->map_pages) {
- ret = ops->map_pages(ops, iova, paddr, pgsize,
- pgcount, prot, gfp, mapped);
- }
-
- return ret;
-}
-
-static void amd_iommu_iotlb_gather_add_page(struct iommu_domain *domain,
- struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t size)
-{
- /*
- * AMD's IOMMU can flush as many pages as necessary in a single flush.
- * Unless we run in a virtual machine, which can be inferred according
- * to whether "non-present cache" is on, it is probably best to prefer
- * (potentially) too extensive TLB flushing (i.e., more misses) over
- * mutliple TLB flushes (i.e., more flushes). For virtual machines the
- * hypervisor needs to synchronize the host IOMMU PTEs with those of
- * the guest, and the trade-off is different: unnecessary TLB flushes
- * should be avoided.
- */
- if (amd_iommu_np_cache &&
- iommu_iotlb_gather_is_disjoint(gather, iova, size))
- iommu_iotlb_sync(domain, gather);
-
- iommu_iotlb_gather_add_range(gather, iova, size);
-}
-
-static size_t amd_iommu_unmap_pages(struct iommu_domain *dom, unsigned long iova,
- size_t pgsize, size_t pgcount,
- struct iommu_iotlb_gather *gather)
-{
- struct protection_domain *domain = to_pdomain(dom);
- struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
- size_t r;
-
- if ((domain->pd_mode == PD_MODE_V1) &&
- (domain->iop.mode == PAGE_MODE_NONE))
- return 0;
-
- r = (ops->unmap_pages) ? ops->unmap_pages(ops, iova, pgsize, pgcount, NULL) : 0;
-
- if (r)
- amd_iommu_iotlb_gather_add_page(dom, gather, iova, r);
-
- return r;
-}
-
-static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
- dma_addr_t iova)
-{
- struct protection_domain *domain = to_pdomain(dom);
- struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
-
- return ops->iova_to_phys(ops, iova);
-}
-
static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
{
switch (cap) {
@@ -2887,28 +2965,6 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
return 0;
}
-static int amd_iommu_read_and_clear_dirty(struct iommu_domain *domain,
- unsigned long iova, size_t size,
- unsigned long flags,
- struct iommu_dirty_bitmap *dirty)
-{
- struct protection_domain *pdomain = to_pdomain(domain);
- struct io_pgtable_ops *ops = &pdomain->iop.pgtbl.ops;
- unsigned long lflags;
-
- if (!ops || !ops->read_and_clear_dirty)
- return -EOPNOTSUPP;
-
- spin_lock_irqsave(&pdomain->lock, lflags);
- if (!pdomain->dirty_tracking && dirty->bitmap) {
- spin_unlock_irqrestore(&pdomain->lock, lflags);
- return -EINVAL;
- }
- spin_unlock_irqrestore(&pdomain->lock, lflags);
-
- return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
-}
-
static void amd_iommu_get_resv_regions(struct device *dev,
struct list_head *head)
{
@@ -2978,28 +3034,6 @@ static bool amd_iommu_is_attach_deferred(struct device *dev)
return dev_data->defer_attach;
}
-static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
-{
- struct protection_domain *dom = to_pdomain(domain);
- unsigned long flags;
-
- spin_lock_irqsave(&dom->lock, flags);
- amd_iommu_domain_flush_all(dom);
- spin_unlock_irqrestore(&dom->lock, flags);
-}
-
-static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
- struct iommu_iotlb_gather *gather)
-{
- struct protection_domain *dom = to_pdomain(domain);
- unsigned long flags;
-
- spin_lock_irqsave(&dom->lock, flags);
- amd_iommu_domain_flush_pages(dom, gather->start,
- gather->end - gather->start + 1);
- spin_unlock_irqrestore(&dom->lock, flags);
-}
-
static int amd_iommu_def_domain_type(struct device *dev)
{
struct iommu_dev_data *dev_data;
@@ -3034,15 +3068,10 @@ static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain)
return true;
}
-static const struct iommu_dirty_ops amd_dirty_ops = {
- .set_dirty_tracking = amd_iommu_set_dirty_tracking,
- .read_and_clear_dirty = amd_iommu_read_and_clear_dirty,
-};
-
const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.blocked_domain = &blocked_domain,
- .release_domain = &release_domain,
+ .release_domain = &blocked_domain,
.identity_domain = &identity_domain.domain,
.domain_alloc_paging_flags = amd_iommu_domain_alloc_paging_flags,
.domain_alloc_sva = amd_iommu_domain_alloc_sva,
@@ -3053,17 +3082,6 @@ const struct iommu_ops amd_iommu_ops = {
.is_attach_deferred = amd_iommu_is_attach_deferred,
.def_domain_type = amd_iommu_def_domain_type,
.page_response = amd_iommu_page_response,
- .default_domain_ops = &(const struct iommu_domain_ops) {
- .attach_dev = amd_iommu_attach_device,
- .map_pages = amd_iommu_map_pages,
- .unmap_pages = amd_iommu_unmap_pages,
- .iotlb_sync_map = amd_iommu_iotlb_sync_map,
- .iova_to_phys = amd_iommu_iova_to_phys,
- .flush_iotlb_all = amd_iommu_flush_iotlb_all,
- .iotlb_sync = amd_iommu_iotlb_sync,
- .free = amd_iommu_domain_free,
- .enforce_cache_coherency = amd_iommu_enforce_cache_coherency,
- }
};
#ifdef CONFIG_IRQ_REMAP
@@ -3354,7 +3372,7 @@ static int __modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
struct irte_ga *irte)
{
- bool ret;
+ int ret;
ret = __modify_irte_ga(iommu, devid, index, irte);
if (ret)
@@ -4072,3 +4090,5 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
return 0;
}
#endif
+
+MODULE_IMPORT_NS("GENERIC_PT_IOMMU");