From 6c17c7d5936e6af6a5bda9f9de98a5e2ee6e8a6f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 7 Aug 2024 15:19:20 -0300 Subject: iommu: Allow ATS to work on VFs when the PF uses IDENTITY PCI ATS has a global Smallest Translation Unit field that is located in the PF but shared by all of the VFs. The expectation is that the STU will be set to the root port's global STU capability which is driven by the IO page table configuration of the iommu HW. Today it becomes set when the iommu driver first enables ATS. Thus, to enable ATS on the VF, the PF must have already had the correct STU programmed, even if ATS is off on the PF. Unfortunately the PF only programs the STU when the PF enables ATS. The iommu drivers tend to leave ATS disabled when IDENTITY translation is being used. Thus we can get into a state where the PF is setup to use IDENTITY with the DMA API while the VF would like to use VFIO with a PAGING domain and have ATS turned on. This fails because the PF never loaded a PAGING domain and so it never setup the STU, and the VF can't do it. The simplest solution is to have the iommu driver set the ATS STU when it probes the device. This way the ATS STU is loaded immediately at boot time to all PFs and there is no issue when a VF comes to use it. Add a new call pci_prepare_ats() which should be called by iommu drivers in their probe_device() op for every PCI device if the iommu driver supports ATS. This will setup the STU based on whatever page size capability the iommu HW has. Signed-off-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/0-v1-0fb4d2ab6770+7e706-ats_vf_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/pci-ats.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index df54cd5b15db..0e8b74e63767 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -8,6 +8,7 @@ /* Address Translation Service */ bool pci_ats_supported(struct pci_dev *dev); int pci_enable_ats(struct pci_dev *dev, int ps); +int pci_prepare_ats(struct pci_dev *dev, int ps); void pci_disable_ats(struct pci_dev *dev); int pci_ats_queue_depth(struct pci_dev *dev); int pci_ats_page_aligned(struct pci_dev *dev); @@ -16,6 +17,8 @@ static inline bool pci_ats_supported(struct pci_dev *d) { return false; } static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; } +static inline int pci_prepare_ats(struct pci_dev *dev, int ps) +{ return -ENODEV; } static inline void pci_disable_ats(struct pci_dev *d) { } static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; } -- cgit v1.2.3 From 47f218d108950984b24af81f66356ceda380eb74 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 29 Aug 2024 21:06:17 -0300 Subject: iommu/amd: Store the nid in io_pgtable_cfg instead of the domain We already have memory in the union here that is being wasted in AMD's case, use it to store the nid. Putting the nid here further isolates the io_pgtable code from the struct protection_domain. Fixup protection_domain_alloc so that the NID from the device is provided, at this point dev is never NULL for AMD so this will now allocate the first table pointer on the correct NUMA node. Signed-off-by: Jason Gunthorpe Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/8-v2-831cdc4d00f3+1a315-amd_iopgtbl_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/amd_iommu_types.h | 1 - drivers/iommu/amd/io_pgtable.c | 8 +++++--- drivers/iommu/amd/io_pgtable_v2.c | 5 ++--- drivers/iommu/amd/iommu.c | 12 +++++++----- drivers/iommu/amd/pasid.c | 2 +- include/linux/io-pgtable.h | 4 ++++ 7 files changed, 20 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 8de6d6091454..29e6e71f7f9a 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -45,7 +45,7 @@ extern enum io_pgtable_fmt amd_iommu_pgtable; extern int amd_iommu_gpt_level; /* Protection domain ops */ -struct protection_domain *protection_domain_alloc(unsigned int type); +struct protection_domain *protection_domain_alloc(unsigned int type, int nid); void protection_domain_free(struct protection_domain *domain); struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, struct mm_struct *mm); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 7aa4f1983e40..30eb07acb8b1 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -571,7 +571,6 @@ struct protection_domain { struct amd_io_pgtable iop; spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - int nid; /* Node ID */ enum protection_domain_mode pd_mode; /* Track page table type */ bool dirty_tracking; /* dirty tracking is enabled in the domain */ unsigned dev_cnt; /* devices assigned to this domain */ diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 53de1146928e..4492a5800b35 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -141,11 +141,12 @@ static bool increase_address_space(struct protection_domain *domain, unsigned long address, gfp_t gfp) { + struct io_pgtable_cfg *cfg = &domain->iop.pgtbl.cfg; unsigned long flags; bool ret = true; u64 *pte; - pte = iommu_alloc_page_node(domain->nid, gfp); + pte = iommu_alloc_page_node(cfg->amd.nid, gfp); if (!pte) return false; @@ -181,6 +182,7 @@ static u64 *alloc_pte(struct protection_domain *domain, gfp_t gfp, bool *updated) { + struct io_pgtable_cfg *cfg = &domain->iop.pgtbl.cfg; int level, end_lvl; u64 *pte, *page; @@ -232,7 +234,7 @@ static u64 *alloc_pte(struct protection_domain *domain, if (!IOMMU_PTE_PRESENT(__pte) || pte_level == PAGE_MODE_NONE) { - page = iommu_alloc_page_node(domain->nid, gfp); + page = iommu_alloc_page_node(cfg->amd.nid, gfp); if (!page) return NULL; @@ -559,7 +561,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); - pgtable->root = iommu_alloc_page(GFP_KERNEL); + pgtable->root = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL); if (!pgtable->root) return NULL; pgtable->mode = PAGE_MODE_3_LEVEL; diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index 45a6bc332639..1e3be8c5312b 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -251,7 +251,7 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, while (mapped_size < size) { map_size = get_alloc_page_size(pgsize); - pte = v2_alloc_pte(pdom->nid, pdom->iop.pgd, + pte = v2_alloc_pte(cfg->amd.nid, pdom->iop.pgd, iova, map_size, gfp, &updated); if (!pte) { ret = -EINVAL; @@ -359,10 +359,9 @@ static void v2_free_pgtable(struct io_pgtable *iop) static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); - struct protection_domain *pdom = (struct protection_domain *)cookie; int ias = IOMMU_IN_ADDR_BIT_SIZE; - pgtable->pgd = iommu_alloc_page_node(pdom->nid, GFP_KERNEL); + pgtable->pgd = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL); if (!pgtable->pgd) return NULL; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 212cc2801274..44231c3b6b71 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2023,6 +2023,7 @@ static int do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); + struct io_pgtable_cfg *cfg = &domain->iop.pgtbl.cfg; int ret = 0; /* Update data structures */ @@ -2030,8 +2031,8 @@ static int do_attach(struct iommu_dev_data *dev_data, list_add(&dev_data->list, &domain->dev_list); /* Update NUMA Node ID */ - if (domain->nid == NUMA_NO_NODE) - domain->nid = dev_to_node(dev_data->dev); + if (cfg->amd.nid == NUMA_NO_NODE) + cfg->amd.nid = dev_to_node(dev_data->dev); /* Do reference counting */ domain->dev_iommu[iommu->index] += 1; @@ -2266,7 +2267,7 @@ void protection_domain_free(struct protection_domain *domain) kfree(domain); } -struct protection_domain *protection_domain_alloc(unsigned int type) +struct protection_domain *protection_domain_alloc(unsigned int type, int nid) { struct io_pgtable_ops *pgtbl_ops; struct protection_domain *domain; @@ -2283,7 +2284,7 @@ struct protection_domain *protection_domain_alloc(unsigned int type) spin_lock_init(&domain->lock); INIT_LIST_HEAD(&domain->dev_list); INIT_LIST_HEAD(&domain->dev_data_list); - domain->nid = NUMA_NO_NODE; + domain->iop.pgtbl.cfg.amd.nid = nid; switch (type) { /* No need to allocate io pgtable ops in passthrough mode */ @@ -2360,7 +2361,8 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, if (dirty_tracking && !amd_iommu_hd_support(iommu)) return ERR_PTR(-EOPNOTSUPP); - domain = protection_domain_alloc(type); + domain = protection_domain_alloc(type, + dev ? dev_to_node(dev) : NUMA_NO_NODE); if (!domain) return ERR_PTR(-ENOMEM); diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c index a68215f2b3e1..0657b9373be5 100644 --- a/drivers/iommu/amd/pasid.c +++ b/drivers/iommu/amd/pasid.c @@ -181,7 +181,7 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, struct protection_domain *pdom; int ret; - pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA); + pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA, dev_to_node(dev)); if (!pdom) return ERR_PTR(-ENOMEM); diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index f9a81761bfce..b1ecfc3cd5bc 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -171,6 +171,10 @@ struct io_pgtable_cfg { u64 ttbr[4]; u32 n_ttbrs; } apple_dart_cfg; + + struct { + int nid; + } amd; }; }; -- cgit v1.2.3