From a2ad1b8101a36c927bcbd1317475b9576c352155 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 25 Oct 2024 06:11:51 -0700 Subject: mm/gup: Add folio_add_pins() Export a function that adds pins to an already-pinned huge-page folio. This allows any range of small pages within the folio to be unpinned later. For example, pages pinned via memfd_pin_folios and modified by folio_add_pins could be unpinned via unpin_user_page(s). Link: https://patch.msgid.link/r/1729861919-234514-2-git-send-email-steven.sistare@oracle.com Suggested-by: Jason Gunthorpe Suggested-by: David Hildenbrand Signed-off-by: Steve Sistare Reviewed-by: Jason Gunthorpe Acked-by: David Hildenbrand Signed-off-by: Jason Gunthorpe --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ecf63d2b0582..f9de33ac2add 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2524,6 +2524,7 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end, struct folio **folios, unsigned int max_folios, pgoff_t *offset); +int folio_add_pins(struct folio *folio, unsigned int pins); int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); -- cgit v1.2.3 From f4986a72d6e4be78ec0e4ee0e03531474621183f Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 25 Oct 2024 06:11:57 -0700 Subject: iommufd: Add IOMMU_IOAS_MAP_FILE Define the IOMMU_IOAS_MAP_FILE ioctl interface, which allows a user to register memory by passing a memfd plus offset and length. Implement it using the memfd_pin_folios() kAPI. Link: https://patch.msgid.link/r/1729861919-234514-8-git-send-email-steven.sistare@oracle.com Suggested-by: Jason Gunthorpe Signed-off-by: Steve Sistare Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/io_pagetable.c | 36 ++++++++++++++++++++++++- drivers/iommu/iommufd/io_pagetable.h | 2 ++ drivers/iommu/iommufd/ioas.c | 47 +++++++++++++++++++++++++++++++++ drivers/iommu/iommufd/iommufd_private.h | 5 ++++ drivers/iommu/iommufd/main.c | 2 ++ drivers/iommu/iommufd/pages.c | 23 ++++++++++++++++ include/uapi/linux/iommufd.h | 25 ++++++++++++++++++ 7 files changed, 139 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 874ee9ea7efc..8a790e597e12 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -268,7 +268,14 @@ static int iopt_alloc_area_pages(struct io_pagetable *iopt, /* Use the first entry to guess the ideal IOVA alignment */ elm = list_first_entry(pages_list, struct iopt_pages_list, next); - start = elm->start_byte + (uintptr_t)elm->pages->uptr; + switch (elm->pages->type) { + case IOPT_ADDRESS_USER: + start = elm->start_byte + (uintptr_t)elm->pages->uptr; + break; + case IOPT_ADDRESS_FILE: + start = elm->start_byte + elm->pages->start; + break; + } rc = iopt_alloc_iova(iopt, dst_iova, start, length); if (rc) goto out_unlock; @@ -446,6 +453,33 @@ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, uptr - pages->uptr, iommu_prot, flags); } +/** + * iopt_map_file_pages() - Like iopt_map_user_pages, but map a file. + * @ictx: iommufd_ctx the iopt is part of + * @iopt: io_pagetable to act on + * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains + * the chosen iova on output. Otherwise is the iova to map to on input + * @file: file to map + * @start: map file starting at this byte offset + * @length: Number of bytes to map + * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping + * @flags: IOPT_ALLOC_IOVA or zero + */ +int iopt_map_file_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, + unsigned long *iova, struct file *file, + unsigned long start, unsigned long length, + int iommu_prot, unsigned int flags) +{ + struct iopt_pages *pages; + + pages = iopt_alloc_file_pages(file, start, length, + iommu_prot & IOMMU_WRITE); + if (IS_ERR(pages)) + return PTR_ERR(pages); + return iopt_map_common(ictx, iopt, pages, iova, length, + start - pages->start, iommu_prot, flags); +} + struct iova_bitmap_fn_arg { unsigned long flags; struct io_pagetable *iopt; diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h index 5ac4eedc0be3..9b40b2237932 100644 --- a/drivers/iommu/iommufd/io_pagetable.h +++ b/drivers/iommu/iommufd/io_pagetable.h @@ -220,6 +220,8 @@ struct iopt_pages { struct iopt_pages *iopt_alloc_user_pages(void __user *uptr, unsigned long length, bool writable); +struct iopt_pages *iopt_alloc_file_pages(struct file *file, unsigned long start, + unsigned long length, bool writable); void iopt_release_pages(struct kref *kref); static inline void iopt_put_pages(struct iopt_pages *pages) { diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index 2c4b2bb11e78..c05d33fc3a50 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -2,6 +2,7 @@ /* * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ +#include #include #include #include @@ -197,6 +198,52 @@ static int conv_iommu_prot(u32 map_flags) return iommu_prot; } +int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd) +{ + struct iommu_ioas_map_file *cmd = ucmd->cmd; + unsigned long iova = cmd->iova; + struct iommufd_ioas *ioas; + unsigned int flags = 0; + struct file *file; + int rc; + + if (cmd->flags & + ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -EOPNOTSUPP; + + if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX) + return -EOVERFLOW; + + if (!(cmd->flags & + (IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE))) + return -EINVAL; + + ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA)) + flags = IOPT_ALLOC_IOVA; + + file = fget(cmd->fd); + if (!file) + return -EBADF; + + rc = iopt_map_file_pages(ucmd->ictx, &ioas->iopt, &iova, file, + cmd->start, cmd->length, + conv_iommu_prot(cmd->flags), flags); + if (rc) + goto out_put; + + cmd->iova = iova; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); +out_put: + iommufd_put_object(ucmd->ictx, &ioas->obj); + fput(file); + return rc; +} + int iommufd_ioas_map(struct iommufd_ucmd *ucmd) { struct iommu_ioas_map *cmd = ucmd->cmd; diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index f1d865e6fab6..8f3c21a664bd 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -69,6 +69,10 @@ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, unsigned long *iova, void __user *uptr, unsigned long length, int iommu_prot, unsigned int flags); +int iopt_map_file_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, + unsigned long *iova, struct file *file, + unsigned long start, unsigned long length, + int iommu_prot, unsigned int flags); int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, unsigned long length, unsigned long *dst_iova, int iommu_prot, unsigned int flags); @@ -276,6 +280,7 @@ void iommufd_ioas_destroy(struct iommufd_object *obj); int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd); int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd); int iommufd_ioas_map(struct iommufd_ucmd *ucmd); +int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd); int iommufd_ioas_copy(struct iommufd_ucmd *ucmd); int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd); int iommufd_ioas_option(struct iommufd_ucmd *ucmd); diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index b5f5d27ee963..826a2b2be52f 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -378,6 +378,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { struct iommu_ioas_iova_ranges, out_iova_alignment), IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, iova), + IOCTL_OP(IOMMU_IOAS_MAP_FILE, iommufd_ioas_map_file, + struct iommu_ioas_map_file, iova), IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap, length), IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 5f371fa88a4a..2ee6fcd2b551 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -45,6 +45,7 @@ * last_iova + 1 can overflow. An iopt_pages index will always be much less than * ULONG_MAX so last_index + 1 cannot overflow. */ +#include #include #include #include @@ -1340,6 +1341,26 @@ struct iopt_pages *iopt_alloc_user_pages(void __user *uptr, return pages; } +struct iopt_pages *iopt_alloc_file_pages(struct file *file, unsigned long start, + unsigned long length, bool writable) + +{ + struct iopt_pages *pages; + unsigned long start_down = ALIGN_DOWN(start, PAGE_SIZE); + unsigned long end; + + if (length && check_add_overflow(start, length - 1, &end)) + return ERR_PTR(-EOVERFLOW); + + pages = iopt_alloc_pages(start - start_down, length, writable); + if (IS_ERR(pages)) + return pages; + pages->file = get_file(file); + pages->start = start_down; + pages->type = IOPT_ADDRESS_FILE; + return pages; +} + void iopt_release_pages(struct kref *kref) { struct iopt_pages *pages = container_of(kref, struct iopt_pages, kref); @@ -1352,6 +1373,8 @@ void iopt_release_pages(struct kref *kref) mutex_destroy(&pages->mutex); put_task_struct(pages->source_task); free_uid(pages->source_user); + if (pages->type == IOPT_ADDRESS_FILE) + fput(pages->file); kfree(pages); } diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 72010f71c5e4..41b1a01e9293 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -51,6 +51,7 @@ enum { IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c, IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, + IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, }; /** @@ -213,6 +214,30 @@ struct iommu_ioas_map { }; #define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP) +/** + * struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE) + * @size: sizeof(struct iommu_ioas_map_file) + * @flags: same as for iommu_ioas_map + * @ioas_id: same as for iommu_ioas_map + * @fd: the memfd to map + * @start: byte offset from start of file to map from + * @length: same as for iommu_ioas_map + * @iova: same as for iommu_ioas_map + * + * Set an IOVA mapping from a memfd file. All other arguments and semantics + * match those of IOMMU_IOAS_MAP. + */ +struct iommu_ioas_map_file { + __u32 size; + __u32 flags; + __u32 ioas_id; + __s32 fd; + __aligned_u64 start; + __aligned_u64 length; + __aligned_u64 iova; +}; +#define IOMMU_IOAS_MAP_FILE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP_FILE) + /** * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY) * @size: sizeof(struct iommu_ioas_copy) -- cgit v1.2.3 From 35890f85573c2ebbbf3491dc66f7ee2ad63055af Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Oct 2024 21:20:45 -0300 Subject: vfio: Remove VFIO_TYPE1_NESTING_IOMMU This control causes the ARM SMMU drivers to choose a stage 2 implementation for the IO pagetable (vs the stage 1 usual default), however this choice has no significant visible impact to the VFIO user. Further qemu never implemented this and no other userspace user is known. The original description in commit f5c9ecebaf2a ("vfio/iommu_type1: add new VFIO_TYPE1_NESTING_IOMMU IOMMU type") suggested this was to "provide SMMU translation services to the guest operating system" however the rest of the API to set the guest table pointer for the stage 1 and manage invalidation was never completed, or at least never upstreamed, rendering this part useless dead code. Upstream has now settled on iommufd as the uAPI for controlling nested translation. Choosing the stage 2 implementation should be done by through the IOMMU_HWPT_ALLOC_NEST_PARENT flag during domain allocation. Remove VFIO_TYPE1_NESTING_IOMMU and everything under it including the enable_nesting iommu_domain_op. Just in-case there is some userspace using this continue to treat requesting it as a NOP, but do not advertise support any more. Acked-by: Alex Williamson Reviewed-by: Mostafa Saleh Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Tested-by: Nicolin Chen Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 16 ---------------- drivers/iommu/arm/arm-smmu/arm-smmu.c | 16 ---------------- drivers/iommu/iommu.c | 10 ---------- drivers/iommu/iommufd/vfio_compat.c | 7 +------ drivers/vfio/vfio_iommu_type1.c | 12 +----------- include/linux/iommu.h | 3 --- include/uapi/linux/vfio.h | 2 +- 7 files changed, 3 insertions(+), 63 deletions(-) (limited to 'include') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 737c5b882355..acf250aeb18b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3378,21 +3378,6 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev) return group; } -static int arm_smmu_enable_nesting(struct iommu_domain *domain) -{ - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - int ret = 0; - - mutex_lock(&smmu_domain->init_mutex); - if (smmu_domain->smmu) - ret = -EPERM; - else - smmu_domain->stage = ARM_SMMU_DOMAIN_S2; - mutex_unlock(&smmu_domain->init_mutex); - - return ret; -} - static int arm_smmu_of_xlate(struct device *dev, const struct of_phandle_args *args) { @@ -3514,7 +3499,6 @@ static struct iommu_ops arm_smmu_ops = { .flush_iotlb_all = arm_smmu_flush_iotlb_all, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, - .enable_nesting = arm_smmu_enable_nesting, .free = arm_smmu_domain_free_paging, } }; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 8321962b3714..12b173eec454 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1558,21 +1558,6 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev) return group; } -static int arm_smmu_enable_nesting(struct iommu_domain *domain) -{ - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - int ret = 0; - - mutex_lock(&smmu_domain->init_mutex); - if (smmu_domain->smmu) - ret = -EPERM; - else - smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; - mutex_unlock(&smmu_domain->init_mutex); - - return ret; -} - static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks) { @@ -1656,7 +1641,6 @@ static struct iommu_ops arm_smmu_ops = { .flush_iotlb_all = arm_smmu_flush_iotlb_all, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, - .enable_nesting = arm_smmu_enable_nesting, .set_pgtable_quirks = arm_smmu_set_pgtable_quirks, .free = arm_smmu_domain_free, } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 83c8e617a2c5..dbd70d5a4702 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2723,16 +2723,6 @@ static int __init iommu_init(void) } core_initcall(iommu_init); -int iommu_enable_nesting(struct iommu_domain *domain) -{ - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; - if (!domain->ops->enable_nesting) - return -EINVAL; - return domain->ops->enable_nesting(domain); -} -EXPORT_SYMBOL_GPL(iommu_enable_nesting); - int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirk) { diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c index a3ad5f0b6c59..514aacd64009 100644 --- a/drivers/iommu/iommufd/vfio_compat.c +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -291,12 +291,7 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx, case VFIO_DMA_CC_IOMMU: return iommufd_vfio_cc_iommu(ictx); - /* - * This is obsolete, and to be removed from VFIO. It was an incomplete - * idea that got merged. - * https://lore.kernel.org/kvm/0-v1-0093c9b0e345+19-vfio_no_nesting_jgg@nvidia.com/ - */ - case VFIO_TYPE1_NESTING_IOMMU: + case __VFIO_RESERVED_TYPE1_NESTING_IOMMU: return 0; /* diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index bf391b40e576..50ebc9593c9d 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -72,7 +72,6 @@ struct vfio_iommu { uint64_t pgsize_bitmap; uint64_t num_non_pinned_groups; bool v2; - bool nesting; bool dirty_page_tracking; struct list_head emulated_iommu_groups; }; @@ -2195,12 +2194,6 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_free_domain; } - if (iommu->nesting) { - ret = iommu_enable_nesting(domain->domain); - if (ret) - goto out_domain; - } - ret = iommu_attach_group(domain->domain, group->iommu_group); if (ret) goto out_domain; @@ -2541,9 +2534,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) switch (arg) { case VFIO_TYPE1_IOMMU: break; - case VFIO_TYPE1_NESTING_IOMMU: - iommu->nesting = true; - fallthrough; + case __VFIO_RESERVED_TYPE1_NESTING_IOMMU: case VFIO_TYPE1v2_IOMMU: iommu->v2 = true; break; @@ -2638,7 +2629,6 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, switch (arg) { case VFIO_TYPE1_IOMMU: case VFIO_TYPE1v2_IOMMU: - case VFIO_TYPE1_NESTING_IOMMU: case VFIO_UNMAP_ALL: return 1; case VFIO_UPDATE_VADDR: diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bd722f473635..c88d18d2c928 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -635,7 +635,6 @@ struct iommu_ops { * @enforce_cache_coherency: Prevent any kind of DMA from bypassing IOMMU_CACHE, * including no-snoop TLPs on PCIe or other platform * specific mechanisms. - * @enable_nesting: Enable nesting * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) * @free: Release the domain after use. */ @@ -663,7 +662,6 @@ struct iommu_domain_ops { dma_addr_t iova); bool (*enforce_cache_coherency)(struct iommu_domain *domain); - int (*enable_nesting)(struct iommu_domain *domain); int (*set_pgtable_quirks)(struct iommu_domain *domain, unsigned long quirks); @@ -844,7 +842,6 @@ extern void iommu_group_put(struct iommu_group *group); extern int iommu_group_id(struct iommu_group *group); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); -int iommu_enable_nesting(struct iommu_domain *domain); int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 2b68e6cdf190..c8dbf8219c4f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -35,7 +35,7 @@ #define VFIO_EEH 5 /* Two-stage IOMMU */ -#define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ +#define __VFIO_RESERVED_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ #define VFIO_SPAPR_TCE_v2_IOMMU 7 -- cgit v1.2.3 From 1b8655bb8d977ca110436c1cd0ca957c19670c1e Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 30 Oct 2024 21:20:46 -0300 Subject: ACPICA: IORT: Update for revision E.f ACPICA commit c4f5c083d24df9ddd71d5782c0988408cf0fc1ab The IORT spec, Issue E.f (April 2024), adds a new CANWBS bit to the Memory Access Flag field in the Memory Access Properties table, mainly for a PCI Root Complex. This CANWBS defines the coherency of memory accesses to be not marked IOWB cacheable/shareable. Its value further implies the coherency impact from a pair of mismatched memory attributes (e.g. in a nested translation case): 0x0: Use of mismatched memory attributes for accesses made by this device may lead to a loss of coherency. 0x1: Coherency of accesses made by this device to locations in Conventional memory are ensured as follows, even if the memory attributes for the accesses presented by the device or provided by the SMMU are different from Inner and Outer Write-back cacheable, Shareable. Link: https://github.com/acpica/acpica/commit/c4f5c083 Acked-by: Rafael J. Wysocki Signed-off-by: Nicolin Chen Acked-by: Hanjun Guo Tested-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Will Deacon --- include/acpi/actbl2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index d3858eebc255..2e917a8f8bca 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -453,7 +453,7 @@ struct acpi_table_ccel { * IORT - IO Remapping Table * * Conforms to "IO Remapping Table System Software on ARM Platforms", - * Document number: ARM DEN 0049E.e, Sep 2022 + * Document number: ARM DEN 0049E.f, Apr 2024 * ******************************************************************************/ @@ -524,6 +524,7 @@ struct acpi_iort_memory_access { #define ACPI_IORT_MF_COHERENCY (1) #define ACPI_IORT_MF_ATTRIBUTES (1<<1) +#define ACPI_IORT_MF_CANWBS (1<<2) /* * IORT node specific subtables -- cgit v1.2.3 From 807404d66fcf898d4bcc6a3e3edb07ffd5b88400 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 30 Oct 2024 21:20:47 -0300 Subject: ACPI/IORT: Support CANWBS memory access flag The IORT spec, Issue E.f (April 2024), adds a new CANWBS bit to the Memory Access Flag field in the Memory Access Properties table, mainly for a PCI Root Complex. This CANWBS defines the coherency of memory accesses to be not marked IOWB cacheable/shareable. Its value further implies the coherency impact from a pair of mismatched memory attributes (e.g. in a nested translation case): 0x0: Use of mismatched memory attributes for accesses made by this device may lead to a loss of coherency. 0x1: Coherency of accesses made by this device to locations in Conventional memory are ensured as follows, even if the memory attributes for the accesses presented by the device or provided by the SMMU are different from Inner and Outer Write-back cacheable, Shareable. Note that the loss of coherency on a CANWBS-unsupported HW typically could occur to an SMMU that doesn't implement the S2FWB feature where additional cache flush operations would be required to prevent that from happening. Add a new ACPI_IORT_MF_CANWBS flag and set IOMMU_FWSPEC_PCI_RC_CANWBS upon the presence of this new flag. CANWBS and S2FWB are similar features, in that they both guarantee the VM can not violate coherency, however S2FWB can be bypassed by PCI No Snoop TLPs, while CANWBS cannot. Thus CANWBS meets the requirements to set IOMMU_CAP_ENFORCE_CACHE_COHERENCY. Architecturally ARM has expected that VFIO would disable No Snoop through PCI Config space, if this is done then the two would have the same protections. Tested-by: Nicolin Chen Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Acked-by: Hanjun Guo Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 13 +++++++++++++ include/linux/iommu.h | 2 ++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 4c745a26226b..1f7e4c691d9e 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1218,6 +1218,17 @@ static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node) return pci_rc->ats_attribute & ACPI_IORT_ATS_SUPPORTED; } +static bool iort_pci_rc_supports_canwbs(struct acpi_iort_node *node) +{ + struct acpi_iort_memory_access *memory_access; + struct acpi_iort_root_complex *pci_rc; + + pci_rc = (struct acpi_iort_root_complex *)node->node_data; + memory_access = + (struct acpi_iort_memory_access *)&pci_rc->memory_properties; + return memory_access->memory_flags & ACPI_IORT_MF_CANWBS; +} + static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node, u32 streamid) { @@ -1335,6 +1346,8 @@ int iort_iommu_configure_id(struct device *dev, const u32 *id_in) fwspec = dev_iommu_fwspec_get(dev); if (fwspec && iort_pci_rc_supports_ats(node)) fwspec->flags |= IOMMU_FWSPEC_PCI_RC_ATS; + if (fwspec && iort_pci_rc_supports_canwbs(node)) + fwspec->flags |= IOMMU_FWSPEC_PCI_RC_CANWBS; } else { node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT, iort_match_node_callback, dev); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c88d18d2c928..4ad9b9ec6c9b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -991,6 +991,8 @@ struct iommu_fwspec { /* ATS is supported */ #define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0) +/* CANWBS is supported */ +#define IOMMU_FWSPEC_PCI_RC_CANWBS (1 << 1) /* * An iommu attach handle represents a relationship between an iommu domain -- cgit v1.2.3 From 6912ec91828b8d7f21b393befad1c36dadbcd751 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 30 Oct 2024 21:20:49 -0300 Subject: iommu/arm-smmu-v3: Support IOMMU_GET_HW_INFO via struct arm_smmu_hw_info For virtualization cases the IDR/IIDR/AIDR values of the actual SMMU instance need to be available to the VMM so it can construct an appropriate vSMMUv3 that reflects the correct HW capabilities. For userspace page tables these values are required to constrain the valid values within the CD table and the IOPTEs. The kernel does not sanitize these values. If building a VMM then userspace is required to only forward bits into a VM that it knows it can implement. Some bits will also require a VMM to detect if appropriate kernel support is available such as for ATS and BTM. Start a new file and kconfig for the advanced iommufd support. This lets it be compiled out for kernels that are not intended to support virtualization, and allows distros to leave it disabled until they are shipping a matching qemu too. Tested-by: Nicolin Chen Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/5-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/Kconfig | 9 ++++++ drivers/iommu/arm/arm-smmu-v3/Makefile | 1 + .../iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 31 +++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 + drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 9 ++++++ include/uapi/linux/iommufd.h | 35 ++++++++++++++++++++++ 6 files changed, 86 insertions(+) create mode 100644 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c (limited to 'include') diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index b3aa1f5d5321..0c9bceb1653d 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -415,6 +415,15 @@ config ARM_SMMU_V3_SVA Say Y here if your system supports SVA extensions such as PCIe PASID and PRI. +config ARM_SMMU_V3_IOMMUFD + bool "Enable IOMMUFD features for ARM SMMUv3 (EXPERIMENTAL)" + depends on IOMMUFD + help + Support for IOMMUFD features intended to support virtual machines + with accelerated virtual IOMMUs. + + Say Y here if you are doing development and testing on this feature. + config ARM_SMMU_V3_KUNIT_TEST tristate "KUnit tests for arm-smmu-v3 driver" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile index dc98c88b48c8..493a659cc66b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/Makefile +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o arm_smmu_v3-y := arm-smmu-v3.o +arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_IOMMUFD) += arm-smmu-v3-iommufd.o arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o arm_smmu_v3-$(CONFIG_TEGRA241_CMDQV) += tegra241-cmdqv.o diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c new file mode 100644 index 000000000000..3d2671031c9b --- /dev/null +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES + */ + +#include + +#include "arm-smmu-v3.h" + +void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type) +{ + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct iommu_hw_info_arm_smmuv3 *info; + u32 __iomem *base_idr; + unsigned int i; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + base_idr = master->smmu->base + ARM_SMMU_IDR0; + for (i = 0; i <= 5; i++) + info->idr[i] = readl_relaxed(base_idr + i); + info->iidr = readl_relaxed(master->smmu->base + ARM_SMMU_IIDR); + info->aidr = readl_relaxed(master->smmu->base + ARM_SMMU_AIDR); + + *length = sizeof(*info); + *type = IOMMU_HW_INFO_TYPE_ARM_SMMUV3; + + return info; +} diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 38725810c14e..996774d461ae 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3506,6 +3506,7 @@ static struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, + .hw_info = arm_smmu_hw_info, .domain_alloc_paging = arm_smmu_domain_alloc_paging, .domain_alloc_sva = arm_smmu_sva_domain_alloc, .domain_alloc_user = arm_smmu_domain_alloc_user, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 06e3d88932df..66261fd5bfb2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -81,6 +81,8 @@ struct arm_smmu_device; #define IIDR_REVISION GENMASK(15, 12) #define IIDR_IMPLEMENTER GENMASK(11, 0) +#define ARM_SMMU_AIDR 0x1C + #define ARM_SMMU_CR0 0x20 #define CR0_ATSCHK (1 << 4) #define CR0_CMDQEN (1 << 3) @@ -956,4 +958,11 @@ tegra241_cmdqv_probe(struct arm_smmu_device *smmu) return ERR_PTR(-ENODEV); } #endif /* CONFIG_TEGRA241_CMDQV */ + +#if IS_ENABLED(CONFIG_ARM_SMMU_V3_IOMMUFD) +void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type); +#else +#define arm_smmu_hw_info NULL +#endif /* CONFIG_ARM_SMMU_V3_IOMMUFD */ + #endif /* _ARM_SMMU_V3_H */ diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 72010f71c5e4..b5c94fecb94c 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -484,15 +484,50 @@ struct iommu_hw_info_vtd { __aligned_u64 ecap_reg; }; +/** + * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information + * (IOMMU_HW_INFO_TYPE_ARM_SMMUV3) + * + * @flags: Must be set to 0 + * @__reserved: Must be 0 + * @idr: Implemented features for ARM SMMU Non-secure programming interface + * @iidr: Information about the implementation and implementer of ARM SMMU, + * and architecture version supported + * @aidr: ARM SMMU architecture version + * + * For the details of @idr, @iidr and @aidr, please refer to the chapters + * from 6.3.1 to 6.3.6 in the SMMUv3 Spec. + * + * User space should read the underlying ARM SMMUv3 hardware information for + * the list of supported features. + * + * Note that these values reflect the raw HW capability, without any insight if + * any required kernel driver support is present. Bits may be set indicating the + * HW has functionality that is lacking kernel software support, such as BTM. If + * a VMM is using this information to construct emulated copies of these + * registers it should only forward bits that it knows it can support. + * + * In future, presence of required kernel support will be indicated in flags. + */ +struct iommu_hw_info_arm_smmuv3 { + __u32 flags; + __u32 __reserved; + __u32 idr[6]; + __u32 iidr; + __u32 aidr; +}; + /** * enum iommu_hw_info_type - IOMMU Hardware Info Types * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware * info * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type + * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type */ enum iommu_hw_info_type { IOMMU_HW_INFO_TYPE_NONE = 0, IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, + IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2, }; /** -- cgit v1.2.3 From d1b3dad9de7962f7bea861f27e352bac17b68bed Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:17 -0800 Subject: iommufd: Move struct iommufd_object to public iommufd header Prepare for an embedded structure design for driver-level iommufd_viommu objects: // include/linux/iommufd.h struct iommufd_viommu { struct iommufd_object obj; .... }; // Some IOMMU driver struct iommu_driver_viommu { struct iommufd_viommu core; .... }; It has to expose struct iommufd_object and enum iommufd_object_type from the core-level private header to the public iommufd header. Link: https://patch.msgid.link/r/54a43b0768089d690104530754f499ca05ce0074.1730836219.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iommufd_private.h | 25 +------------------------ include/linux/iommufd.h | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 8f3c21a664bd..94cfcab7e9de 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -5,8 +5,8 @@ #define __IOMMUFD_PRIVATE_H #include +#include #include -#include #include #include #include @@ -126,29 +126,6 @@ static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd, return 0; } -enum iommufd_object_type { - IOMMUFD_OBJ_NONE, - IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, - IOMMUFD_OBJ_DEVICE, - IOMMUFD_OBJ_HWPT_PAGING, - IOMMUFD_OBJ_HWPT_NESTED, - IOMMUFD_OBJ_IOAS, - IOMMUFD_OBJ_ACCESS, - IOMMUFD_OBJ_FAULT, -#ifdef CONFIG_IOMMUFD_TEST - IOMMUFD_OBJ_SELFTEST, -#endif - IOMMUFD_OBJ_MAX, -}; - -/* Base struct for all objects with a userspace ID handle. */ -struct iommufd_object { - refcount_t shortterm_users; - refcount_t users; - enum iommufd_object_type type; - unsigned int id; -}; - static inline bool iommufd_lock_obj(struct iommufd_object *obj) { if (!refcount_inc_not_zero(&obj->users)) diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 30f832a60ccb..22948dd03d67 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -8,6 +8,7 @@ #include #include +#include #include struct device; @@ -18,6 +19,29 @@ struct iommufd_ctx; struct iommufd_device; struct page; +enum iommufd_object_type { + IOMMUFD_OBJ_NONE, + IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, + IOMMUFD_OBJ_DEVICE, + IOMMUFD_OBJ_HWPT_PAGING, + IOMMUFD_OBJ_HWPT_NESTED, + IOMMUFD_OBJ_IOAS, + IOMMUFD_OBJ_ACCESS, + IOMMUFD_OBJ_FAULT, +#ifdef CONFIG_IOMMUFD_TEST + IOMMUFD_OBJ_SELFTEST, +#endif + IOMMUFD_OBJ_MAX, +}; + +/* Base struct for all objects with a userspace ID handle. */ +struct iommufd_object { + refcount_t shortterm_users; + refcount_t users; + enum iommufd_object_type type; + unsigned int id; +}; + struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); void iommufd_device_unbind(struct iommufd_device *idev); -- cgit v1.2.3 From 7d4f46c2372d5a850e28f63001013de50843b6e6 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:18 -0800 Subject: iommufd: Move _iommufd_object_alloc helper to a sharable file The following patch will add a new vIOMMU allocator that will require this _iommufd_object_alloc to be sharable with IOMMU drivers (and iommufd too). Add a new driver.c file that will be built with CONFIG_IOMMUFD_DRIVER_CORE selected by CONFIG_IOMMUFD, and put the CONFIG_DRIVER under that remaining to be selectable for drivers to build the existing iova_bitmap.c file. Link: https://patch.msgid.link/r/2f4f6e116dc49ffb67ff6c5e8a7a8e789ab9e98e.1730836219.git.nicolinc@nvidia.com Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/Kconfig | 4 ++++ drivers/iommu/iommufd/Makefile | 3 +++ drivers/iommu/iommufd/driver.c | 40 +++++++++++++++++++++++++++++++++ drivers/iommu/iommufd/iommufd_private.h | 4 ---- drivers/iommu/iommufd/main.c | 32 -------------------------- include/linux/iommufd.h | 13 +++++++++++ 6 files changed, 60 insertions(+), 36 deletions(-) create mode 100644 drivers/iommu/iommufd/driver.c (limited to 'include') diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index 76656fe0470d..0a07f9449fd9 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -1,4 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only +config IOMMUFD_DRIVER_CORE + tristate + default (IOMMUFD_DRIVER || IOMMUFD) if IOMMUFD!=n + config IOMMUFD tristate "IOMMU Userspace API" select INTERVAL_TREE diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index cf4605962bea..83df9077063e 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -13,3 +13,6 @@ iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o obj-$(CONFIG_IOMMUFD) += iommufd.o obj-$(CONFIG_IOMMUFD_DRIVER) += iova_bitmap.o + +iommufd_driver-y := driver.o +obj-$(CONFIG_IOMMUFD_DRIVER_CORE) += iommufd_driver.o diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c new file mode 100644 index 000000000000..2bc47d92a0ab --- /dev/null +++ b/drivers/iommu/iommufd/driver.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES + */ +#include "iommufd_private.h" + +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type) +{ + struct iommufd_object *obj; + int rc; + + obj = kzalloc(size, GFP_KERNEL_ACCOUNT); + if (!obj) + return ERR_PTR(-ENOMEM); + obj->type = type; + /* Starts out bias'd by 1 until it is removed from the xarray */ + refcount_set(&obj->shortterm_users, 1); + refcount_set(&obj->users, 1); + + /* + * Reserve an ID in the xarray but do not publish the pointer yet since + * the caller hasn't initialized it yet. Once the pointer is published + * in the xarray and visible to other threads we can't reliably destroy + * it anymore, so the caller must complete all errorable operations + * before calling iommufd_object_finalize(). + */ + rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b, + GFP_KERNEL_ACCOUNT); + if (rc) + goto out_free; + return obj; +out_free: + kfree(obj); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_NS_GPL(_iommufd_object_alloc, IOMMUFD); + +MODULE_DESCRIPTION("iommufd code shared with builtin modules"); +MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 94cfcab7e9de..be347f726fda 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -206,10 +206,6 @@ iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, iommufd_object_remove(ictx, obj, obj->id, 0); } -struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, - size_t size, - enum iommufd_object_type type); - #define __iommufd_object_alloc(ictx, ptr, type, obj) \ container_of(_iommufd_object_alloc( \ ictx, \ diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 826a2b2be52f..3c32b440471b 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -29,38 +29,6 @@ struct iommufd_object_ops { static const struct iommufd_object_ops iommufd_object_ops[]; static struct miscdevice vfio_misc_dev; -struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, - size_t size, - enum iommufd_object_type type) -{ - struct iommufd_object *obj; - int rc; - - obj = kzalloc(size, GFP_KERNEL_ACCOUNT); - if (!obj) - return ERR_PTR(-ENOMEM); - obj->type = type; - /* Starts out bias'd by 1 until it is removed from the xarray */ - refcount_set(&obj->shortterm_users, 1); - refcount_set(&obj->users, 1); - - /* - * Reserve an ID in the xarray but do not publish the pointer yet since - * the caller hasn't initialized it yet. Once the pointer is published - * in the xarray and visible to other threads we can't reliably destroy - * it anymore, so the caller must complete all errorable operations - * before calling iommufd_object_finalize(). - */ - rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, - xa_limit_31b, GFP_KERNEL_ACCOUNT); - if (rc) - goto out_free; - return obj; -out_free: - kfree(obj); - return ERR_PTR(rc); -} - /* * Allow concurrent access to the object. * diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 22948dd03d67..94522d4029ca 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -135,4 +135,17 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) return -EOPNOTSUPP; } #endif /* CONFIG_IOMMUFD */ + +#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE) +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type); +#else /* !CONFIG_IOMMUFD_DRIVER_CORE */ +static inline struct iommufd_object * +_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, + enum iommufd_object_type type) +{ + return ERR_PTR(-EOPNOTSUPP); +} +#endif /* CONFIG_IOMMUFD_DRIVER_CORE */ #endif -- cgit v1.2.3 From 6b22d562fcd6e3d1cc1c265b0596840946d16a09 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:19 -0800 Subject: iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct Add a new IOMMUFD_OBJ_VIOMMU with an iommufd_viommu structure to represent a slice of physical IOMMU device passed to or shared with a user space VM. This slice, now a vIOMMU object, is a group of virtualization resources of a physical IOMMU's, such as: - Security namespace for guest owned ID, e.g. guest-controlled cache tags - Non-device-affiliated event reporting, e.g. invalidation queue errors - Access to a sharable nesting parent pagetable across physical IOMMUs - Virtualization of various platforms IDs, e.g. RIDs and others - Delivery of paravirtualized invalidation - Direct assigned invalidation queues - Direct assigned interrupts Add a new viommu_alloc op in iommu_ops, for drivers to allocate their own vIOMMU structures. And this allocation also needs a free(), so add struct iommufd_viommu_ops. To simplify a vIOMMU allocation, provide a iommufd_viommu_alloc() helper. It's suggested that a driver should embed a core-level viommu structure in its driver-level viommu struct and call the iommufd_viommu_alloc() helper, meanwhile the driver can also implement a viommu ops: struct my_driver_viommu { struct iommufd_viommu core; /* driver-owned properties/features */ .... }; static const struct iommufd_viommu_ops my_driver_viommu_ops = { .free = my_driver_viommu_free, /* future ops for virtualization features */ .... }; static struct iommufd_viommu my_driver_viommu_alloc(...) { struct my_driver_viommu *my_viommu = iommufd_viommu_alloc(ictx, my_driver_viommu, core, my_driver_viommu_ops); /* Init my_viommu and related HW feature */ .... return &my_viommu->core; } static struct iommu_domain_ops my_driver_domain_ops = { .... .viommu_alloc = my_driver_viommu_alloc, }; Link: https://patch.msgid.link/r/64685e2b79dea0f1dc56f6ede04809b72d578935.1730836219.git.nicolinc@nvidia.com Suggested-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 14 ++++++++++++++ include/linux/iommufd.h | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bd722f473635..2574fc8abaf2 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -42,6 +42,8 @@ struct notifier_block; struct iommu_sva; struct iommu_dma_cookie; struct iommu_fault_param; +struct iommufd_ctx; +struct iommufd_viommu; #define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ #define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ @@ -542,6 +544,14 @@ static inline int __iommu_copy_struct_from_user_array( * @remove_dev_pasid: Remove any translation configurations of a specific * pasid, so that any DMA transactions with this pasid * will be blocked by the hardware. + * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind + * the @dev, as the set of virtualization resources shared/passed + * to user space IOMMU instance. And associate it with a nesting + * @parent_domain. The @viommu_type must be defined in the header + * include/uapi/linux/iommufd.h + * It is required to call iommufd_viommu_alloc() helper for + * a bundled allocation of the core and the driver structures, + * using the given @ictx pointer. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops * @identity_domain: An always available, always attachable identity @@ -591,6 +601,10 @@ struct iommu_ops { void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid, struct iommu_domain *domain); + struct iommufd_viommu *(*viommu_alloc)( + struct device *dev, struct iommu_domain *parent_domain, + struct iommufd_ctx *ictx, unsigned int viommu_type); + const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner; diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 94522d4029ca..4fc2ce332f10 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -17,6 +17,7 @@ struct iommu_group; struct iommufd_access; struct iommufd_ctx; struct iommufd_device; +struct iommufd_viommu_ops; struct page; enum iommufd_object_type { @@ -28,6 +29,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_IOAS, IOMMUFD_OBJ_ACCESS, IOMMUFD_OBJ_FAULT, + IOMMUFD_OBJ_VIOMMU, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -78,6 +80,26 @@ void iommufd_access_detach(struct iommufd_access *access); void iommufd_ctx_get(struct iommufd_ctx *ictx); +struct iommufd_viommu { + struct iommufd_object obj; + struct iommufd_ctx *ictx; + struct iommu_device *iommu_dev; + struct iommufd_hwpt_paging *hwpt; + + const struct iommufd_viommu_ops *ops; + + unsigned int type; +}; + +/** + * struct iommufd_viommu_ops - vIOMMU specific operations + * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory + * of the vIOMMU will be free-ed by iommufd core after calling this op + */ +struct iommufd_viommu_ops { + void (*destroy)(struct iommufd_viommu *viommu); +}; + #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); struct iommufd_ctx *iommufd_ctx_from_fd(int fd); @@ -148,4 +170,22 @@ _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, return ERR_PTR(-EOPNOTSUPP); } #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ + +/* + * Helpers for IOMMU driver to allocate driver structures that will be freed by + * the iommufd core. The free op will be called prior to freeing the memory. + */ +#define iommufd_viommu_alloc(ictx, drv_struct, member, viommu_ops) \ + ({ \ + drv_struct *ret; \ + \ + static_assert(__same_type(struct iommufd_viommu, \ + ((drv_struct *)NULL)->member)); \ + static_assert(offsetof(drv_struct, member.obj) == 0); \ + ret = (drv_struct *)_iommufd_object_alloc( \ + ictx, sizeof(drv_struct), IOMMUFD_OBJ_VIOMMU); \ + if (!IS_ERR(ret)) \ + ret->member.ops = viommu_ops; \ + ret; \ + }) #endif -- cgit v1.2.3 From 4db97c21ed07a7d4081ed9820599fa36857083d6 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:21 -0800 Subject: iommufd/viommu: Add IOMMU_VIOMMU_ALLOC ioctl Add a new ioctl for user space to do a vIOMMU allocation. It must be based on a nesting parent HWPT, so take its refcount. IOMMU driver wanting to support vIOMMUs must define its IOMMU_VIOMMU_TYPE_ in the uAPI header and implement a viommu_alloc op in its iommu_ops. Link: https://patch.msgid.link/r/dc2b8ba9ac935007beff07c1761c31cd097ed780.1730836219.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/Makefile | 3 +- drivers/iommu/iommufd/iommufd_private.h | 3 ++ drivers/iommu/iommufd/main.c | 6 +++ drivers/iommu/iommufd/viommu.c | 81 +++++++++++++++++++++++++++++++++ include/uapi/linux/iommufd.h | 40 ++++++++++++++++ 5 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 drivers/iommu/iommufd/viommu.c (limited to 'include') diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index 83df9077063e..cb784da6cddc 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -7,7 +7,8 @@ iommufd-y := \ ioas.o \ main.o \ pages.o \ - vfio_compat.o + vfio_compat.o \ + viommu.o iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index be347f726fda..a8104d9d4cef 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -506,6 +506,9 @@ static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev, return iommu_group_replace_domain(idev->igroup->group, hwpt->domain); } +int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd); +void iommufd_viommu_destroy(struct iommufd_object *obj); + #ifdef CONFIG_IOMMUFD_TEST int iommufd_test(struct iommufd_ucmd *ucmd); void iommufd_selftest_destroy(struct iommufd_object *obj); diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 30e6c2af3b45..cc514f9bc3e6 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -307,6 +307,7 @@ union ucmd_buffer { struct iommu_ioas_unmap unmap; struct iommu_option option; struct iommu_vfio_ioas vfio_ioas; + struct iommu_viommu_alloc viommu; #ifdef CONFIG_IOMMUFD_TEST struct iommu_test_cmd test; #endif @@ -360,6 +361,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { val64), IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas, __reserved), + IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl, + struct iommu_viommu_alloc, out_viommu_id), #ifdef CONFIG_IOMMUFD_TEST IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last), #endif @@ -495,6 +498,9 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { [IOMMUFD_OBJ_FAULT] = { .destroy = iommufd_fault_destroy, }, + [IOMMUFD_OBJ_VIOMMU] = { + .destroy = iommufd_viommu_destroy, + }, #ifdef CONFIG_IOMMUFD_TEST [IOMMUFD_OBJ_SELFTEST] = { .destroy = iommufd_selftest_destroy, diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c new file mode 100644 index 000000000000..888239b78667 --- /dev/null +++ b/drivers/iommu/iommufd/viommu.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES + */ +#include "iommufd_private.h" + +void iommufd_viommu_destroy(struct iommufd_object *obj) +{ + struct iommufd_viommu *viommu = + container_of(obj, struct iommufd_viommu, obj); + + if (viommu->ops && viommu->ops->destroy) + viommu->ops->destroy(viommu); + refcount_dec(&viommu->hwpt->common.obj.users); +} + +int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) +{ + struct iommu_viommu_alloc *cmd = ucmd->cmd; + struct iommufd_hwpt_paging *hwpt_paging; + struct iommufd_viommu *viommu; + struct iommufd_device *idev; + const struct iommu_ops *ops; + int rc; + + if (cmd->flags || cmd->type == IOMMU_VIOMMU_TYPE_DEFAULT) + return -EOPNOTSUPP; + + idev = iommufd_get_device(ucmd, cmd->dev_id); + if (IS_ERR(idev)) + return PTR_ERR(idev); + + ops = dev_iommu_ops(idev->dev); + if (!ops->viommu_alloc) { + rc = -EOPNOTSUPP; + goto out_put_idev; + } + + hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id); + if (IS_ERR(hwpt_paging)) { + rc = PTR_ERR(hwpt_paging); + goto out_put_idev; + } + + if (!hwpt_paging->nest_parent) { + rc = -EINVAL; + goto out_put_hwpt; + } + + viommu = ops->viommu_alloc(idev->dev, hwpt_paging->common.domain, + ucmd->ictx, cmd->type); + if (IS_ERR(viommu)) { + rc = PTR_ERR(viommu); + goto out_put_hwpt; + } + + viommu->type = cmd->type; + viommu->ictx = ucmd->ictx; + viommu->hwpt = hwpt_paging; + refcount_inc(&viommu->hwpt->common.obj.users); + /* + * It is the most likely case that a physical IOMMU is unpluggable. A + * pluggable IOMMU instance (if exists) is responsible for refcounting + * on its own. + */ + viommu->iommu_dev = __iommu_get_iommu_dev(idev->dev); + + cmd->out_viommu_id = viommu->obj.id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_abort; + iommufd_object_finalize(ucmd->ictx, &viommu->obj); + goto out_put_hwpt; + +out_abort: + iommufd_object_abort_and_destroy(ucmd->ictx, &viommu->obj); +out_put_hwpt: + iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj); +out_put_idev: + iommufd_put_object(ucmd->ictx, &idev->obj); + return rc; +} diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 41b1a01e9293..302844136b02 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -52,6 +52,7 @@ enum { IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, + IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, }; /** @@ -822,4 +823,43 @@ struct iommu_fault_alloc { __u32 out_fault_fd; }; #define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC) + +/** + * enum iommu_viommu_type - Virtual IOMMU Type + * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use + */ +enum iommu_viommu_type { + IOMMU_VIOMMU_TYPE_DEFAULT = 0, +}; + +/** + * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC) + * @size: sizeof(struct iommu_viommu_alloc) + * @flags: Must be 0 + * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type + * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU + * @hwpt_id: ID of a nesting parent HWPT to associate to + * @out_viommu_id: Output virtual IOMMU ID for the allocated object + * + * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's + * virtualization support that is a security-isolated slice of the real IOMMU HW + * that is unique to a specific VM. Operations global to the IOMMU are connected + * to the vIOMMU, such as: + * - Security namespace for guest owned ID, e.g. guest-controlled cache tags + * - Non-device-affiliated event reporting, e.g. invalidation queue errors + * - Access to a sharable nesting parent pagetable across physical IOMMUs + * - Virtualization of various platforms IDs, e.g. RIDs and others + * - Delivery of paravirtualized invalidation + * - Direct assigned invalidation queues + * - Direct assigned interrupts + */ +struct iommu_viommu_alloc { + __u32 size; + __u32 flags; + __u32 type; + __u32 dev_id; + __u32 hwpt_id; + __u32 out_viommu_id; +}; +#define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC) #endif -- cgit v1.2.3 From 69d2689e57f5cb235c0609dd2f8fa10c1a832f87 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:22 -0800 Subject: iommufd: Add alloc_domain_nested op to iommufd_viommu_ops Allow IOMMU driver to use a vIOMMU object that holds a nesting parent hwpt/domain to allocate a nested domain. Link: https://patch.msgid.link/r/2dcdb5e405dc0deb68230564530d989d285d959c.1730836219.git.nicolinc@nvidia.com Suggested-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 4fc2ce332f10..de9b56265c9c 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -14,6 +14,7 @@ struct device; struct file; struct iommu_group; +struct iommu_user_data; struct iommufd_access; struct iommufd_ctx; struct iommufd_device; @@ -95,9 +96,17 @@ struct iommufd_viommu { * struct iommufd_viommu_ops - vIOMMU specific operations * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory * of the vIOMMU will be free-ed by iommufd core after calling this op + * @alloc_domain_nested: Allocate a IOMMU_DOMAIN_NESTED on a vIOMMU that holds a + * nesting parent domain (IOMMU_DOMAIN_PAGING). @user_data + * must be defined in include/uapi/linux/iommufd.h. + * It must fully initialize the new iommu_domain before + * returning. Upon failure, ERR_PTR must be returned. */ struct iommufd_viommu_ops { void (*destroy)(struct iommufd_viommu *viommu); + struct iommu_domain *(*alloc_domain_nested)( + struct iommufd_viommu *viommu, u32 flags, + const struct iommu_user_data *user_data); }; #if IS_ENABLED(CONFIG_IOMMUFD) -- cgit v1.2.3 From 13a750180fc86d41695c8f64d8892412482a401d Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:23 -0800 Subject: iommufd: Allow pt_id to carry viommu_id for IOMMU_HWPT_ALLOC Now a vIOMMU holds a shareable nesting parent HWPT. So, it can act like that nesting parent HWPT to allocate a nested HWPT. Support that in the IOMMU_HWPT_ALLOC ioctl handler, and update its kdoc. Also, add an iommufd_viommu_alloc_hwpt_nested helper to allocate a nested HWPT for a vIOMMU object. Since a vIOMMU object holds the parent hwpt's refcount already, increase the refcount of the vIOMMU only. Link: https://patch.msgid.link/r/a0f24f32bfada8b448d17587adcaedeeb50a67ed.1730836219.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/hw_pagetable.c | 73 ++++++++++++++++++++++++++++++++- drivers/iommu/iommufd/iommufd_private.h | 1 + include/uapi/linux/iommufd.h | 14 ++++--- 3 files changed, 81 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index d06bf6e6c19f..982bf4a35a2b 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -57,7 +57,10 @@ void iommufd_hwpt_nested_destroy(struct iommufd_object *obj) container_of(obj, struct iommufd_hwpt_nested, common.obj); __iommufd_hwpt_destroy(&hwpt_nested->common); - refcount_dec(&hwpt_nested->parent->common.obj.users); + if (hwpt_nested->viommu) + refcount_dec(&hwpt_nested->viommu->obj.users); + else + refcount_dec(&hwpt_nested->parent->common.obj.users); } void iommufd_hwpt_nested_abort(struct iommufd_object *obj) @@ -260,6 +263,58 @@ out_abort: return ERR_PTR(rc); } +/** + * iommufd_viommu_alloc_hwpt_nested() - Get a hwpt_nested for a vIOMMU + * @viommu: vIOMMU ojbect to associate the hwpt_nested/domain with + * @flags: Flags from userspace + * @user_data: user_data pointer. Must be valid + * + * Allocate a new IOMMU_DOMAIN_NESTED for a vIOMMU and return it as a NESTED + * hw_pagetable. + */ +static struct iommufd_hwpt_nested * +iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, + const struct iommu_user_data *user_data) +{ + struct iommufd_hwpt_nested *hwpt_nested; + struct iommufd_hw_pagetable *hwpt; + int rc; + + if (!user_data->len) + return ERR_PTR(-EOPNOTSUPP); + if (!viommu->ops || !viommu->ops->alloc_domain_nested) + return ERR_PTR(-EOPNOTSUPP); + + hwpt_nested = __iommufd_object_alloc( + viommu->ictx, hwpt_nested, IOMMUFD_OBJ_HWPT_NESTED, common.obj); + if (IS_ERR(hwpt_nested)) + return ERR_CAST(hwpt_nested); + hwpt = &hwpt_nested->common; + + hwpt_nested->viommu = viommu; + refcount_inc(&viommu->obj.users); + hwpt_nested->parent = viommu->hwpt; + + hwpt->domain = + viommu->ops->alloc_domain_nested(viommu, flags, user_data); + if (IS_ERR(hwpt->domain)) { + rc = PTR_ERR(hwpt->domain); + hwpt->domain = NULL; + goto out_abort; + } + hwpt->domain->owner = viommu->iommu_dev->ops; + + if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { + rc = -EINVAL; + goto out_abort; + } + return hwpt_nested; + +out_abort: + iommufd_object_abort_and_destroy(viommu->ictx, &hwpt->obj); + return ERR_PTR(rc); +} + int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) { struct iommu_hwpt_alloc *cmd = ucmd->cmd; @@ -316,6 +371,22 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) goto out_unlock; } hwpt = &hwpt_nested->common; + } else if (pt_obj->type == IOMMUFD_OBJ_VIOMMU) { + struct iommufd_hwpt_nested *hwpt_nested; + struct iommufd_viommu *viommu; + + viommu = container_of(pt_obj, struct iommufd_viommu, obj); + if (viommu->iommu_dev != __iommu_get_iommu_dev(idev->dev)) { + rc = -EINVAL; + goto out_unlock; + } + hwpt_nested = iommufd_viommu_alloc_hwpt_nested( + viommu, cmd->flags, &user_data); + if (IS_ERR(hwpt_nested)) { + rc = PTR_ERR(hwpt_nested); + goto out_unlock; + } + hwpt = &hwpt_nested->common; } else { rc = -EINVAL; goto out_put_pt; diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index a8104d9d4cef..e8f5ef550cc9 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -290,6 +290,7 @@ struct iommufd_hwpt_paging { struct iommufd_hwpt_nested { struct iommufd_hw_pagetable common; struct iommufd_hwpt_paging *parent; + struct iommufd_viommu *viommu; }; static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt) diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 302844136b02..a498d4838f9a 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -435,7 +435,7 @@ enum iommu_hwpt_data_type { * @size: sizeof(struct iommu_hwpt_alloc) * @flags: Combination of enum iommufd_hwpt_alloc_flags * @dev_id: The device to allocate this HWPT for - * @pt_id: The IOAS or HWPT to connect this HWPT to + * @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to * @out_hwpt_id: The ID of the new HWPT * @__reserved: Must be 0 * @data_type: One of enum iommu_hwpt_data_type @@ -454,11 +454,13 @@ enum iommu_hwpt_data_type { * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags. * - * A user-managed nested HWPT will be created from a given parent HWPT via - * @pt_id, in which the parent HWPT must be allocated previously via the - * same ioctl from a given IOAS (@pt_id). In this case, the @data_type - * must be set to a pre-defined type corresponding to an I/O page table - * type supported by the underlying IOMMU hardware. + * A user-managed nested HWPT will be created from a given vIOMMU (wrapping a + * parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be + * allocated previously via the same ioctl from a given IOAS (@pt_id). In this + * case, the @data_type must be set to a pre-defined type corresponding to an + * I/O page table type supported by the underlying IOMMU hardware. The device + * via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU + * instance. * * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr -- cgit v1.2.3 From 0ce5c2477af2e2284b9c70474e4dae85db211680 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:05:09 -0800 Subject: iommufd/viommu: Add IOMMUFD_OBJ_VDEVICE and IOMMU_VDEVICE_ALLOC ioctl Introduce a new IOMMUFD_OBJ_VDEVICE to represent a physical device (struct device) against a vIOMMU (struct iommufd_viommu) object in a VM. This vDEVICE object (and its structure) holds all the infos and attributes in the VM, regarding the device related to the vIOMMU. As an initial patch, add a per-vIOMMU virtual ID. This can be: - Virtual StreamID on a nested ARM SMMUv3, an index to a Stream Table - Virtual DeviceID on a nested AMD IOMMU, an index to a Device Table - Virtual RID on a nested Intel VT-D IOMMU, an index to a Context Table Potentially, this vDEVICE structure would hold some vData for Confidential Compute Architecture (CCA). Use this virtual ID to index an "vdevs" xarray that belongs to a vIOMMU object. Add a new ioctl for vDEVICE allocations. Since a vDEVICE is a connection of a device object and an iommufd_viommu object, take two refcounts in the ioctl handler. Link: https://patch.msgid.link/r/cda8fd2263166e61b8191a3b3207e0d2b08545bf.1730836308.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iommufd_private.h | 18 ++++++++ drivers/iommu/iommufd/main.c | 6 +++ drivers/iommu/iommufd/viommu.c | 76 +++++++++++++++++++++++++++++++++ include/linux/iommufd.h | 4 ++ include/uapi/linux/iommufd.h | 22 ++++++++++ 5 files changed, 126 insertions(+) (limited to 'include') diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index e8f5ef550cc9..062656c19a07 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -507,8 +507,26 @@ static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev, return iommu_group_replace_domain(idev->igroup->group, hwpt->domain); } +static inline struct iommufd_viommu * +iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id) +{ + return container_of(iommufd_get_object(ucmd->ictx, id, + IOMMUFD_OBJ_VIOMMU), + struct iommufd_viommu, obj); +} + int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_viommu_destroy(struct iommufd_object *obj); +int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd); +void iommufd_vdevice_destroy(struct iommufd_object *obj); + +struct iommufd_vdevice { + struct iommufd_object obj; + struct iommufd_ctx *ictx; + struct iommufd_viommu *viommu; + struct device *dev; + u64 id; /* per-vIOMMU virtual ID */ +}; #ifdef CONFIG_IOMMUFD_TEST int iommufd_test(struct iommufd_ucmd *ucmd); diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index cc514f9bc3e6..d735fe04197f 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -308,6 +308,7 @@ union ucmd_buffer { struct iommu_option option; struct iommu_vfio_ioas vfio_ioas; struct iommu_viommu_alloc viommu; + struct iommu_vdevice_alloc vdev; #ifdef CONFIG_IOMMUFD_TEST struct iommu_test_cmd test; #endif @@ -363,6 +364,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { __reserved), IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl, struct iommu_viommu_alloc, out_viommu_id), + IOCTL_OP(IOMMU_VDEVICE_ALLOC, iommufd_vdevice_alloc_ioctl, + struct iommu_vdevice_alloc, virt_id), #ifdef CONFIG_IOMMUFD_TEST IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last), #endif @@ -501,6 +504,9 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { [IOMMUFD_OBJ_VIOMMU] = { .destroy = iommufd_viommu_destroy, }, + [IOMMUFD_OBJ_VDEVICE] = { + .destroy = iommufd_vdevice_destroy, + }, #ifdef CONFIG_IOMMUFD_TEST [IOMMUFD_OBJ_SELFTEST] = { .destroy = iommufd_selftest_destroy, diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index 888239b78667..69b88e8c7c26 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -11,6 +11,7 @@ void iommufd_viommu_destroy(struct iommufd_object *obj) if (viommu->ops && viommu->ops->destroy) viommu->ops->destroy(viommu); refcount_dec(&viommu->hwpt->common.obj.users); + xa_destroy(&viommu->vdevs); } int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) @@ -53,6 +54,7 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) goto out_put_hwpt; } + xa_init(&viommu->vdevs); viommu->type = cmd->type; viommu->ictx = ucmd->ictx; viommu->hwpt = hwpt_paging; @@ -79,3 +81,77 @@ out_put_idev: iommufd_put_object(ucmd->ictx, &idev->obj); return rc; } + +void iommufd_vdevice_destroy(struct iommufd_object *obj) +{ + struct iommufd_vdevice *vdev = + container_of(obj, struct iommufd_vdevice, obj); + struct iommufd_viommu *viommu = vdev->viommu; + + /* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */ + xa_cmpxchg(&viommu->vdevs, vdev->id, vdev, NULL, GFP_KERNEL); + refcount_dec(&viommu->obj.users); + put_device(vdev->dev); +} + +int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) +{ + struct iommu_vdevice_alloc *cmd = ucmd->cmd; + struct iommufd_vdevice *vdev, *curr; + struct iommufd_viommu *viommu; + struct iommufd_device *idev; + u64 virt_id = cmd->virt_id; + int rc = 0; + + /* virt_id indexes an xarray */ + if (virt_id > ULONG_MAX) + return -EINVAL; + + viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); + if (IS_ERR(viommu)) + return PTR_ERR(viommu); + + idev = iommufd_get_device(ucmd, cmd->dev_id); + if (IS_ERR(idev)) { + rc = PTR_ERR(idev); + goto out_put_viommu; + } + + if (viommu->iommu_dev != __iommu_get_iommu_dev(idev->dev)) { + rc = -EINVAL; + goto out_put_idev; + } + + vdev = iommufd_object_alloc(ucmd->ictx, vdev, IOMMUFD_OBJ_VDEVICE); + if (IS_ERR(vdev)) { + rc = PTR_ERR(vdev); + goto out_put_idev; + } + + vdev->id = virt_id; + vdev->dev = idev->dev; + get_device(idev->dev); + vdev->viommu = viommu; + refcount_inc(&viommu->obj.users); + + curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev, GFP_KERNEL); + if (curr) { + rc = xa_err(curr) ?: -EEXIST; + goto out_abort; + } + + cmd->out_vdevice_id = vdev->obj.id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_abort; + iommufd_object_finalize(ucmd->ictx, &vdev->obj); + goto out_put_idev; + +out_abort: + iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj); +out_put_idev: + iommufd_put_object(ucmd->ictx, &idev->obj); +out_put_viommu: + iommufd_put_object(ucmd->ictx, &viommu->obj); + return rc; +} diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index de9b56265c9c..71fa1e343023 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -10,6 +10,7 @@ #include #include #include +#include struct device; struct file; @@ -31,6 +32,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_ACCESS, IOMMUFD_OBJ_FAULT, IOMMUFD_OBJ_VIOMMU, + IOMMUFD_OBJ_VDEVICE, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -89,6 +91,8 @@ struct iommufd_viommu { const struct iommufd_viommu_ops *ops; + struct xarray vdevs; + unsigned int type; }; diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index a498d4838f9a..9b5236004b8e 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -53,6 +53,7 @@ enum { IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, + IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, }; /** @@ -864,4 +865,25 @@ struct iommu_viommu_alloc { __u32 out_viommu_id; }; #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC) + +/** + * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC) + * @size: sizeof(struct iommu_vdevice_alloc) + * @viommu_id: vIOMMU ID to associate with the virtual device + * @dev_id: The physical device to allocate a virtual instance on the vIOMMU + * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY + * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID + * of AMD IOMMU, and vRID of a nested Intel VT-d to a Context Table + * + * Allocate a virtual device instance (for a physical device) against a vIOMMU. + * This instance holds the device's information (related to its vIOMMU) in a VM. + */ +struct iommu_vdevice_alloc { + __u32 size; + __u32 viommu_id; + __u32 dev_id; + __u32 out_vdevice_id; + __aligned_u64 virt_id; +}; +#define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC) #endif -- cgit v1.2.3 From 67db79dc1a411335f8a7e53a5e206d96b237d9a8 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:05:11 -0800 Subject: iommu/viommu: Add cache_invalidate to iommufd_viommu_ops This per-vIOMMU cache_invalidate op is like the cache_invalidate_user op in struct iommu_domain_ops, but wider, supporting device cache (e.g. PCI ATC invaldiations). Link: https://patch.msgid.link/r/90138505850fa6b165135e78a87b4cc7022869a4.1730836308.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 71fa1e343023..2bc735ff9511 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -16,6 +16,7 @@ struct device; struct file; struct iommu_group; struct iommu_user_data; +struct iommu_user_data_array; struct iommufd_access; struct iommufd_ctx; struct iommufd_device; @@ -105,12 +106,21 @@ struct iommufd_viommu { * must be defined in include/uapi/linux/iommufd.h. * It must fully initialize the new iommu_domain before * returning. Upon failure, ERR_PTR must be returned. + * @cache_invalidate: Flush hardware cache used by a vIOMMU. It can be used for + * any IOMMU hardware specific cache: TLB and device cache. + * The @array passes in the cache invalidation requests, in + * form of a driver data structure. A driver must update the + * array->entry_num to report the number of handled requests. + * The data structure of the array entry must be defined in + * include/uapi/linux/iommufd.h */ struct iommufd_viommu_ops { void (*destroy)(struct iommufd_viommu *viommu); struct iommu_domain *(*alloc_domain_nested)( struct iommufd_viommu *viommu, u32 flags, const struct iommu_user_data *user_data); + int (*cache_invalidate)(struct iommufd_viommu *viommu, + struct iommu_user_data_array *array); }; #if IS_ENABLED(CONFIG_IOMMUFD) -- cgit v1.2.3 From 54ce69e36c71c88f258b1a322c54343d90954858 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:05:12 -0800 Subject: iommufd: Allow hwpt_id to carry viommu_id for IOMMU_HWPT_INVALIDATE With a vIOMMU object, use space can flush any IOMMU related cache that can be directed via a vIOMMU object. It is similar to the IOMMU_HWPT_INVALIDATE uAPI, but can cover a wider range than IOTLB, e.g. device/desciprtor cache. Allow hwpt_id of the iommu_hwpt_invalidate structure to carry a viommu_id, and reuse the IOMMU_HWPT_INVALIDATE uAPI for vIOMMU invalidations. Drivers can define different structures for vIOMMU invalidations v.s. HWPT ones. Since both the HWPT-based and vIOMMU-based invalidation pathways check own cache invalidation op, remove the WARN_ON_ONCE in the allocator. Update the uAPI, kdoc, and selftest case accordingly. Link: https://patch.msgid.link/r/b411e2245e303b8a964f39f49453a5dff280968f.1730836308.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/hw_pagetable.c | 40 +++++++++++++++++++++++++-------- include/uapi/linux/iommufd.h | 9 +++++--- tools/testing/selftests/iommu/iommufd.c | 4 ++-- 3 files changed, 39 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 982bf4a35a2b..702057655a81 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -251,8 +251,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, } hwpt->domain->owner = ops; - if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED || - !hwpt->domain->ops->cache_invalidate_user)) { + if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { rc = -EINVAL; goto out_abort; } @@ -483,7 +482,7 @@ int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd) .entry_len = cmd->entry_len, .entry_num = cmd->entry_num, }; - struct iommufd_hw_pagetable *hwpt; + struct iommufd_object *pt_obj; u32 done_num = 0; int rc; @@ -497,17 +496,40 @@ int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd) goto out; } - hwpt = iommufd_get_hwpt_nested(ucmd, cmd->hwpt_id); - if (IS_ERR(hwpt)) { - rc = PTR_ERR(hwpt); + pt_obj = iommufd_get_object(ucmd->ictx, cmd->hwpt_id, IOMMUFD_OBJ_ANY); + if (IS_ERR(pt_obj)) { + rc = PTR_ERR(pt_obj); goto out; } + if (pt_obj->type == IOMMUFD_OBJ_HWPT_NESTED) { + struct iommufd_hw_pagetable *hwpt = + container_of(pt_obj, struct iommufd_hw_pagetable, obj); + + if (!hwpt->domain->ops || + !hwpt->domain->ops->cache_invalidate_user) { + rc = -EOPNOTSUPP; + goto out_put_pt; + } + rc = hwpt->domain->ops->cache_invalidate_user(hwpt->domain, + &data_array); + } else if (pt_obj->type == IOMMUFD_OBJ_VIOMMU) { + struct iommufd_viommu *viommu = + container_of(pt_obj, struct iommufd_viommu, obj); + + if (!viommu->ops || !viommu->ops->cache_invalidate) { + rc = -EOPNOTSUPP; + goto out_put_pt; + } + rc = viommu->ops->cache_invalidate(viommu, &data_array); + } else { + rc = -EINVAL; + goto out_put_pt; + } - rc = hwpt->domain->ops->cache_invalidate_user(hwpt->domain, - &data_array); done_num = data_array.entry_num; - iommufd_put_object(ucmd->ictx, &hwpt->obj); +out_put_pt: + iommufd_put_object(ucmd->ictx, pt_obj); out: cmd->entry_num = done_num; if (iommufd_ucmd_respond(ucmd, sizeof(*cmd))) diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 9b5236004b8e..badb41c5bfa4 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -700,7 +700,7 @@ struct iommu_hwpt_vtd_s1_invalidate { /** * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) * @size: sizeof(struct iommu_hwpt_invalidate) - * @hwpt_id: ID of a nested HWPT for cache invalidation + * @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation * @data_uptr: User pointer to an array of driver-specific cache invalidation * data. * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data @@ -711,8 +711,11 @@ struct iommu_hwpt_vtd_s1_invalidate { * Output the number of requests successfully handled by kernel. * @__reserved: Must be 0. * - * Invalidate the iommu cache for user-managed page table. Modifications on a - * user-managed page table should be followed by this operation to sync cache. + * Invalidate iommu cache for user-managed page table or vIOMMU. Modifications + * on a user-managed page table should be followed by this operation, if a HWPT + * is passed in via @hwpt_id. Other caches, such as device cache or descriptor + * cache can be flushed if a vIOMMU is passed in via the @hwpt_id field. + * * Each ioctl can support one or more cache invalidation requests in the array * that has a total size of @entry_len * @entry_num. * diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index f3cb628753c9..8cb3e835ca97 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -367,9 +367,9 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, parent_hwpt_id)); - /* hwpt_invalidate only supports a user-managed hwpt (nested) */ + /* hwpt_invalidate does not support a parent hwpt */ num_inv = 1; - test_err_hwpt_invalidate(ENOENT, parent_hwpt_id, inv_reqs, + test_err_hwpt_invalidate(EINVAL, parent_hwpt_id, inv_reqs, IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, sizeof(*inv_reqs), &num_inv); assert(!num_inv); -- cgit v1.2.3 From 4f2e59ccb69866c5165525bd7aee47cd5cd00acc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 5 Nov 2024 12:05:13 -0800 Subject: iommu: Add iommu_copy_struct_from_full_user_array helper The iommu_copy_struct_from_user_array helper can be used to copy a single entry from a user array which might not be efficient if the array is big. Add a new iommu_copy_struct_from_full_user_array to copy the entire user array at once. Update the existing iommu_copy_struct_from_user_array kdoc accordingly. Link: https://patch.msgid.link/r/5cd773d9c26920c5807d232b21d415ea79172e49.1730836308.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2574fc8abaf2..11de66237eaa 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -493,7 +493,9 @@ static inline int __iommu_copy_struct_from_user_array( * @index: Index to the location in the array to copy user data from * @min_last: The last member of the data structure @kdst points in the * initial version. - * Return 0 for success, otherwise -error. + * + * Copy a single entry from a user array. Return 0 for success, otherwise + * -error. */ #define iommu_copy_struct_from_user_array(kdst, user_array, data_type, index, \ min_last) \ @@ -501,6 +503,50 @@ static inline int __iommu_copy_struct_from_user_array( kdst, user_array, data_type, index, sizeof(*(kdst)), \ offsetofend(typeof(*(kdst)), min_last)) +/** + * iommu_copy_struct_from_full_user_array - Copy iommu driver specific user + * space data from an iommu_user_data_array + * @kdst: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @kdst_entry_size: sizeof(*kdst) + * @user_array: Pointer to a struct iommu_user_data_array for a user space + * array + * @data_type: The data type of the @kdst. Must match with @user_array->type + * + * Copy the entire user array. kdst must have room for kdst_entry_size * + * user_array->entry_num bytes. Return 0 for success, otherwise -error. + */ +static inline int +iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, + struct iommu_user_data_array *user_array, + unsigned int data_type) +{ + unsigned int i; + int ret; + + if (user_array->type != data_type) + return -EINVAL; + if (!user_array->entry_num) + return -EINVAL; + if (likely(user_array->entry_len == kdst_entry_size)) { + if (copy_from_user(kdst, user_array->uptr, + user_array->entry_num * + user_array->entry_len)) + return -EFAULT; + } + + /* Copy item by item */ + for (i = 0; i != user_array->entry_num; i++) { + ret = copy_struct_from_user( + kdst + kdst_entry_size * i, kdst_entry_size, + user_array->uptr + user_array->entry_len * i, + user_array->entry_len); + if (ret) + return ret; + } + return 0; +} + /** * struct iommu_ops - iommu ops and capabilities * @capable: check capability -- cgit v1.2.3 From c747e67978ff473e6830afe0b1f3986dd1f444b5 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:05:14 -0800 Subject: iommufd/viommu: Add iommufd_viommu_find_dev helper This avoids a bigger trouble of exposing struct iommufd_device and struct iommufd_vdevice in the public header. Link: https://patch.msgid.link/r/84fa7c624db4d4508067ccfdf42059533950180a.1730836308.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/driver.c | 13 +++++++++++++ include/linux/iommufd.h | 8 ++++++++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index 2bc47d92a0ab..7b67fdf44134 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -36,5 +36,18 @@ out_free: } EXPORT_SYMBOL_NS_GPL(_iommufd_object_alloc, IOMMUFD); +/* Caller should xa_lock(&viommu->vdevs) to protect the return value */ +struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, + unsigned long vdev_id) +{ + struct iommufd_vdevice *vdev; + + lockdep_assert_held(&viommu->vdevs.xa_lock); + + vdev = xa_load(&viommu->vdevs, vdev_id); + return vdev ? vdev->dev : NULL; +} +EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, IOMMUFD); + MODULE_DESCRIPTION("iommufd code shared with builtin modules"); MODULE_LICENSE("GPL"); diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 2bc735ff9511..11110c749200 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -185,6 +185,8 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, enum iommufd_object_type type); +struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, + unsigned long vdev_id); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ static inline struct iommufd_object * _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, @@ -192,6 +194,12 @@ _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, { return ERR_PTR(-EOPNOTSUPP); } + +static inline struct device * +iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) +{ + return NULL; +} #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ /* -- cgit v1.2.3 From 69d9b312f38aa19f8c801e90bd23d70685be49f0 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 30 Oct 2024 21:20:52 -0300 Subject: iommu/arm-smmu-v3: Support IOMMU_VIOMMU_ALLOC Add a new driver-type for ARM SMMUv3 to enum iommu_viommu_type. Implement an arm_vsmmu_alloc(). As an initial step, copy the VMID from s2_parent. A followup series is required to give the VIOMMU object it's own VMID that will be used in all nesting configurations. Link: https://patch.msgid.link/r/8-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 45 ++++++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 + drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 13 +++++++ include/uapi/linux/iommufd.h | 4 ++ 4 files changed, 63 insertions(+) (limited to 'include') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index 3d2671031c9b..60dd9e907595 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -29,3 +29,48 @@ void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type) return info; } + +static const struct iommufd_viommu_ops arm_vsmmu_ops = { +}; + +struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, + struct iommu_domain *parent, + struct iommufd_ctx *ictx, + unsigned int viommu_type) +{ + struct arm_smmu_device *smmu = + iommu_get_iommu_dev(dev, struct arm_smmu_device, iommu); + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_domain *s2_parent = to_smmu_domain(parent); + struct arm_vsmmu *vsmmu; + + if (viommu_type != IOMMU_VIOMMU_TYPE_ARM_SMMUV3) + return ERR_PTR(-EOPNOTSUPP); + + if (!(smmu->features & ARM_SMMU_FEAT_NESTING)) + return ERR_PTR(-EOPNOTSUPP); + + if (s2_parent->smmu != master->smmu) + return ERR_PTR(-EINVAL); + + /* + * Must support some way to prevent the VM from bypassing the cache + * because VFIO currently does not do any cache maintenance. canwbs + * indicates the device is fully coherent and no cache maintenance is + * ever required, even for PCI No-Snoop. + */ + if (!arm_smmu_master_canwbs(master)) + return ERR_PTR(-EOPNOTSUPP); + + vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core, + &arm_vsmmu_ops); + if (IS_ERR(vsmmu)) + return ERR_CAST(vsmmu); + + vsmmu->smmu = smmu; + vsmmu->s2_parent = s2_parent; + /* FIXME Move VMID allocation from the S2 domain allocation to here */ + vsmmu->vmid = s2_parent->s2_cfg.vmid; + + return &vsmmu->core; +} diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index b4b03206afbf..c425fb923eb3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3517,6 +3517,7 @@ static struct iommu_ops arm_smmu_ops = { .dev_disable_feat = arm_smmu_dev_disable_feature, .page_response = arm_smmu_page_response, .def_domain_type = arm_smmu_def_domain_type, + .viommu_alloc = arm_vsmmu_alloc, .pgsize_bitmap = -1UL, /* Restricted during device attach */ .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index c9e5290e995a..3b8013afcec0 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -976,10 +977,22 @@ tegra241_cmdqv_probe(struct arm_smmu_device *smmu) } #endif /* CONFIG_TEGRA241_CMDQV */ +struct arm_vsmmu { + struct iommufd_viommu core; + struct arm_smmu_device *smmu; + struct arm_smmu_domain *s2_parent; + u16 vmid; +}; + #if IS_ENABLED(CONFIG_ARM_SMMU_V3_IOMMUFD) void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type); +struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, + struct iommu_domain *parent, + struct iommufd_ctx *ictx, + unsigned int viommu_type); #else #define arm_smmu_hw_info NULL +#define arm_vsmmu_alloc NULL #endif /* CONFIG_ARM_SMMU_V3_IOMMUFD */ #endif /* _ARM_SMMU_V3_H */ diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index f4f76759b738..7cb13a29969d 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -425,10 +425,12 @@ struct iommu_hwpt_vtd_s1 { * enum iommu_hwpt_data_type - IOMMU HWPT Data Type * @IOMMU_HWPT_DATA_NONE: no data * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table + * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table */ enum iommu_hwpt_data_type { IOMMU_HWPT_DATA_NONE = 0, IOMMU_HWPT_DATA_VTD_S1 = 1, + IOMMU_HWPT_DATA_ARM_SMMUV3 = 2, }; /** @@ -868,9 +870,11 @@ struct iommu_fault_alloc { /** * enum iommu_viommu_type - Virtual IOMMU Type * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use + * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type */ enum iommu_viommu_type { IOMMU_VIOMMU_TYPE_DEFAULT = 0, + IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1, }; /** -- cgit v1.2.3 From 1e8be08d1c91d52a9b51d424db78ddbf88660bbb Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Oct 2024 21:20:53 -0300 Subject: iommu/arm-smmu-v3: Support IOMMU_DOMAIN_NESTED For SMMUv3 a IOMMU_DOMAIN_NESTED is composed of a S2 iommu_domain acting as the parent and a user provided STE fragment that defines the CD table and related data with addresses translated by the S2 iommu_domain. The kernel only permits userspace to control certain allowed bits of the STE that are safe for user/guest control. IOTLB maintenance is a bit subtle here, the S1 implicitly includes the S2 translation, but there is no way of knowing which S1 entries refer to a range of S2. For the IOTLB we follow ARM's guidance and issue a CMDQ_OP_TLBI_NH_ALL to flush all ASIDs from the VMID after flushing the S2 on any change to the S2. The IOMMU_DOMAIN_NESTED can only be created from inside a VIOMMU as the invalidation path relies on the VIOMMU to translate virtual stream ID used in the invalidation commands for the CD table and ATS. Link: https://patch.msgid.link/r/9-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Reviewed-by: Nicolin Chen Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Signed-off-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 163 +++++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 17 ++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 26 ++++ include/uapi/linux/iommufd.h | 20 +++ 4 files changed, 225 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index 60dd9e907595..91247a2a2d2c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -30,7 +30,170 @@ void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type) return info; } +static void arm_smmu_make_nested_cd_table_ste( + struct arm_smmu_ste *target, struct arm_smmu_master *master, + struct arm_smmu_nested_domain *nested_domain, bool ats_enabled) +{ + arm_smmu_make_s2_domain_ste( + target, master, nested_domain->vsmmu->s2_parent, ats_enabled); + + target->data[0] = cpu_to_le64(STRTAB_STE_0_V | + FIELD_PREP(STRTAB_STE_0_CFG, + STRTAB_STE_0_CFG_NESTED)); + target->data[0] |= nested_domain->ste[0] & + ~cpu_to_le64(STRTAB_STE_0_CFG); + target->data[1] |= nested_domain->ste[1]; +} + +/* + * Create a physical STE from the virtual STE that userspace provided when it + * created the nested domain. Using the vSTE userspace can request: + * - Non-valid STE + * - Abort STE + * - Bypass STE (install the S2, no CD table) + * - CD table STE (install the S2 and the userspace CD table) + */ +static void arm_smmu_make_nested_domain_ste( + struct arm_smmu_ste *target, struct arm_smmu_master *master, + struct arm_smmu_nested_domain *nested_domain, bool ats_enabled) +{ + unsigned int cfg = + FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0])); + + /* + * Userspace can request a non-valid STE through the nesting interface. + * We relay that into an abort physical STE with the intention that + * C_BAD_STE for this SID can be generated to userspace. + */ + if (!(nested_domain->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) + cfg = STRTAB_STE_0_CFG_ABORT; + + switch (cfg) { + case STRTAB_STE_0_CFG_S1_TRANS: + arm_smmu_make_nested_cd_table_ste(target, master, nested_domain, + ats_enabled); + break; + case STRTAB_STE_0_CFG_BYPASS: + arm_smmu_make_s2_domain_ste(target, master, + nested_domain->vsmmu->s2_parent, + ats_enabled); + break; + case STRTAB_STE_0_CFG_ABORT: + default: + arm_smmu_make_abort_ste(target); + break; + } +} + +static int arm_smmu_attach_dev_nested(struct iommu_domain *domain, + struct device *dev) +{ + struct arm_smmu_nested_domain *nested_domain = + to_smmu_nested_domain(domain); + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_attach_state state = { + .master = master, + .old_domain = iommu_get_domain_for_dev(dev), + .ssid = IOMMU_NO_PASID, + /* Currently invalidation of ATC is not supported */ + .disable_ats = true, + }; + struct arm_smmu_ste ste; + int ret; + + if (nested_domain->vsmmu->smmu != master->smmu) + return -EINVAL; + if (arm_smmu_ssids_in_use(&master->cd_table)) + return -EBUSY; + + mutex_lock(&arm_smmu_asid_lock); + ret = arm_smmu_attach_prepare(&state, domain); + if (ret) { + mutex_unlock(&arm_smmu_asid_lock); + return ret; + } + + arm_smmu_make_nested_domain_ste(&ste, master, nested_domain, + state.ats_enabled); + arm_smmu_install_ste_for_dev(master, &ste); + arm_smmu_attach_commit(&state); + mutex_unlock(&arm_smmu_asid_lock); + return 0; +} + +static void arm_smmu_domain_nested_free(struct iommu_domain *domain) +{ + kfree(to_smmu_nested_domain(domain)); +} + +static const struct iommu_domain_ops arm_smmu_nested_ops = { + .attach_dev = arm_smmu_attach_dev_nested, + .free = arm_smmu_domain_nested_free, +}; + +static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg) +{ + unsigned int cfg; + + if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) { + memset(arg->ste, 0, sizeof(arg->ste)); + return 0; + } + + /* EIO is reserved for invalid STE data. */ + if ((arg->ste[0] & ~STRTAB_STE_0_NESTING_ALLOWED) || + (arg->ste[1] & ~STRTAB_STE_1_NESTING_ALLOWED)) + return -EIO; + + cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(arg->ste[0])); + if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS && + cfg != STRTAB_STE_0_CFG_S1_TRANS) + return -EIO; + return 0; +} + +static struct iommu_domain * +arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, + const struct iommu_user_data *user_data) +{ + struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core); + const u32 SUPPORTED_FLAGS = IOMMU_HWPT_FAULT_ID_VALID; + struct arm_smmu_nested_domain *nested_domain; + struct iommu_hwpt_arm_smmuv3 arg; + int ret; + + /* + * Faults delivered to the nested domain are faults that originated by + * the S1 in the domain. The core code will match all PASIDs when + * delivering the fault due to user_pasid_table + */ + if (flags & ~SUPPORTED_FLAGS) + return ERR_PTR(-EOPNOTSUPP); + + ret = iommu_copy_struct_from_user(&arg, user_data, + IOMMU_HWPT_DATA_ARM_SMMUV3, ste); + if (ret) + return ERR_PTR(ret); + + ret = arm_smmu_validate_vste(&arg); + if (ret) + return ERR_PTR(ret); + + nested_domain = kzalloc(sizeof(*nested_domain), GFP_KERNEL_ACCOUNT); + if (!nested_domain) + return ERR_PTR(-ENOMEM); + + nested_domain->domain.type = IOMMU_DOMAIN_NESTED; + nested_domain->domain.ops = &arm_smmu_nested_ops; + nested_domain->vsmmu = vsmmu; + nested_domain->ste[0] = arg.ste[0]; + nested_domain->ste[1] = arg.ste[1] & ~cpu_to_le64(STRTAB_STE_1_EATS); + + return &nested_domain->domain; +} + static const struct iommufd_viommu_ops arm_vsmmu_ops = { + .alloc_domain_nested = arm_vsmmu_alloc_domain_nested, }; struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index c425fb923eb3..53f12b9d78ab 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -295,6 +295,7 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) case CMDQ_OP_TLBI_NH_ASID: cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); fallthrough; + case CMDQ_OP_TLBI_NH_ALL: case CMDQ_OP_TLBI_S12_VMALL: cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); break; @@ -2230,6 +2231,15 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, } __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); + if (smmu_domain->nest_parent) { + /* + * When the S2 domain changes all the nested S1 ASIDs have to be + * flushed too. + */ + cmd.opcode = CMDQ_OP_TLBI_NH_ALL; + arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd); + } + /* * Unfortunately, this can't be leaf-only since we may have * zapped an entire table. @@ -2644,6 +2654,8 @@ to_smmu_domain_devices(struct iommu_domain *domain) if ((domain->type & __IOMMU_DOMAIN_PAGING) || domain->type == IOMMU_DOMAIN_SVA) return to_smmu_domain(domain); + if (domain->type == IOMMU_DOMAIN_NESTED) + return to_smmu_nested_domain(domain)->vsmmu->s2_parent; return NULL; } @@ -2716,7 +2728,8 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, * enabled if we have arm_smmu_domain, those always have page * tables. */ - state->ats_enabled = arm_smmu_ats_supported(master); + state->ats_enabled = !state->disable_ats && + arm_smmu_ats_supported(master); } if (smmu_domain) { @@ -3122,6 +3135,7 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags, goto err_free; } smmu_domain->stage = ARM_SMMU_DOMAIN_S2; + smmu_domain->nest_parent = true; } smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED; @@ -3518,6 +3532,7 @@ static struct iommu_ops arm_smmu_ops = { .page_response = arm_smmu_page_response, .def_domain_type = arm_smmu_def_domain_type, .viommu_alloc = arm_vsmmu_alloc, + .user_pasid_table = 1, .pgsize_bitmap = -1UL, /* Restricted during device attach */ .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 3b8013afcec0..3fabe187ea78 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -244,6 +244,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_0_CFG_BYPASS 4 #define STRTAB_STE_0_CFG_S1_TRANS 5 #define STRTAB_STE_0_CFG_S2_TRANS 6 +#define STRTAB_STE_0_CFG_NESTED 7 #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4) #define STRTAB_STE_0_S1FMT_LINEAR 0 @@ -295,6 +296,15 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4) +/* These bits can be controlled by userspace for STRTAB_STE_0_CFG_NESTED */ +#define STRTAB_STE_0_NESTING_ALLOWED \ + cpu_to_le64(STRTAB_STE_0_V | STRTAB_STE_0_CFG | STRTAB_STE_0_S1FMT | \ + STRTAB_STE_0_S1CTXPTR_MASK | STRTAB_STE_0_S1CDMAX) +#define STRTAB_STE_1_NESTING_ALLOWED \ + cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR | \ + STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH | \ + STRTAB_STE_1_S1STALLD) + /* * Context descriptors. * @@ -514,6 +524,7 @@ struct arm_smmu_cmdq_ent { }; } cfgi; + #define CMDQ_OP_TLBI_NH_ALL 0x10 #define CMDQ_OP_TLBI_NH_ASID 0x11 #define CMDQ_OP_TLBI_NH_VA 0x12 #define CMDQ_OP_TLBI_EL2_ALL 0x20 @@ -815,10 +826,18 @@ struct arm_smmu_domain { struct list_head devices; spinlock_t devices_lock; bool enforce_cache_coherency : 1; + bool nest_parent : 1; struct mmu_notifier mmu_notifier; }; +struct arm_smmu_nested_domain { + struct iommu_domain domain; + struct arm_vsmmu *vsmmu; + + __le64 ste[2]; +}; + /* The following are exposed for testing purposes. */ struct arm_smmu_entry_writer_ops; struct arm_smmu_entry_writer { @@ -863,6 +882,12 @@ static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) return container_of(dom, struct arm_smmu_domain, domain); } +static inline struct arm_smmu_nested_domain * +to_smmu_nested_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct arm_smmu_nested_domain, domain); +} + extern struct xarray arm_smmu_asid_xa; extern struct mutex arm_smmu_asid_lock; @@ -909,6 +934,7 @@ struct arm_smmu_attach_state { struct iommu_domain *old_domain; struct arm_smmu_master *master; bool cd_needs_ats; + bool disable_ats; ioasid_t ssid; /* Resulting state */ bool ats_enabled; diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 7cb13a29969d..b6baaa1e55b1 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -421,6 +421,26 @@ struct iommu_hwpt_vtd_s1 { __u32 __reserved; }; +/** + * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE + * (IOMMU_HWPT_DATA_ARM_SMMUV3) + * + * @ste: The first two double words of the user space Stream Table Entry for + * the translation. Must be little-endian. + * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec) + * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax + * - word-1: S1DSS, S1CIR, S1COR, S1CSH, S1STALLD + * + * -EIO will be returned if @ste is not legal or contains any non-allowed field. + * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass + * nested domain will translate the same as the nesting parent. The S1 will + * install a Context Descriptor Table pointing at userspace memory translated + * by the nesting parent. + */ +struct iommu_hwpt_arm_smmuv3 { + __aligned_le64 ste[2]; +}; + /** * enum iommu_hwpt_data_type - IOMMU HWPT Data Type * @IOMMU_HWPT_DATA_NONE: no data -- cgit v1.2.3 From 67e4fe3985138325c9b21193be52266750616182 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Oct 2024 21:20:54 -0300 Subject: iommu/arm-smmu-v3: Use S2FWB for NESTED domains Force Write Back (FWB) changes how the S2 IOPTE's MemAttr field works. When S2FWB is supported and enabled the IOPTE will force cachable access to IOMMU_CACHE memory when nesting with a S1 and deny cachable access when !IOMMU_CACHE. When using a single stage of translation, a simple S2 domain, it doesn't change things for PCI devices as it is just a different encoding for the existing mapping of the IOMMU protection flags to cachability attributes. For non-PCI it also changes the combining rules when incoming transactions have inconsistent attributes. However, when used with a nested S1, FWB has the effect of preventing the guest from choosing a MemAttr in it's S1 that would cause ordinary DMA to bypass the cache. Consistent with KVM we wish to deny the guest the ability to become incoherent with cached memory the hypervisor believes is cachable so we don't have to flush it. Allow NESTED domains to be created if the SMMU has S2FWB support and use S2FWB for NESTING_PARENTS. This is an additional option to CANWBS. Link: https://patch.msgid.link/r/10-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Reviewed-by: Nicolin Chen Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 7 ++++-- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 ++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++ drivers/iommu/io-pgtable-arm.c | 27 +++++++++++++++++----- include/linux/io-pgtable.h | 2 ++ 5 files changed, 38 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index 91247a2a2d2c..a1c8fcd4797c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -220,9 +220,12 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, * Must support some way to prevent the VM from bypassing the cache * because VFIO currently does not do any cache maintenance. canwbs * indicates the device is fully coherent and no cache maintenance is - * ever required, even for PCI No-Snoop. + * ever required, even for PCI No-Snoop. S2FWB means the S1 can't make + * things non-coherent using the memattr, but No-Snoop behavior is not + * effected. */ - if (!arm_smmu_master_canwbs(master)) + if (!arm_smmu_master_canwbs(master) && + !(smmu->features & ARM_SMMU_FEAT_S2FWB)) return ERR_PTR(-EOPNOTSUPP); vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 53f12b9d78ab..de598d66b5c2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1046,7 +1046,8 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) /* S2 translates */ if (cfg & BIT(1)) { used_bits[1] |= - cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG); + cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS | + STRTAB_STE_1_SHCFG); used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI | @@ -1654,6 +1655,8 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, FIELD_PREP(STRTAB_STE_1_EATS, ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_S2FWB) + target->data[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB); if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); @@ -2472,6 +2475,9 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, pgtbl_cfg.oas = smmu->oas; fmt = ARM_64_LPAE_S2; finalise_stage_fn = arm_smmu_domain_finalise_s2; + if ((smmu->features & ARM_SMMU_FEAT_S2FWB) && + (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_S2FWB; break; default: return -EINVAL; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 3fabe187ea78..5a025d310dbe 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -58,6 +58,7 @@ struct arm_smmu_device; #define IDR1_SIDSIZE GENMASK(5, 0) #define ARM_SMMU_IDR3 0xc +#define IDR3_FWB (1 << 8) #define IDR3_RIL (1 << 10) #define ARM_SMMU_IDR5 0x14 @@ -265,6 +266,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4) #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6) +#define STRTAB_STE_1_S2FWB (1UL << 25) #define STRTAB_STE_1_S1STALLD (1UL << 27) #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28) @@ -740,6 +742,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20) #define ARM_SMMU_FEAT_HA (1 << 21) #define ARM_SMMU_FEAT_HD (1 << 22) +#define ARM_SMMU_FEAT_S2FWB (1 << 23) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 0e67f1721a3d..74f58c6ac30c 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -106,6 +106,18 @@ #define ARM_LPAE_PTE_HAP_FAULT (((arm_lpae_iopte)0) << 6) #define ARM_LPAE_PTE_HAP_READ (((arm_lpae_iopte)1) << 6) #define ARM_LPAE_PTE_HAP_WRITE (((arm_lpae_iopte)2) << 6) +/* + * For !FWB these code to: + * 1111 = Normal outer write back cachable / Inner Write Back Cachable + * Permit S1 to override + * 0101 = Normal Non-cachable / Inner Non-cachable + * 0001 = Device / Device-nGnRE + * For S2FWB these code: + * 0110 Force Normal Write Back + * 0101 Normal* is forced Normal-NC, Device unchanged + * 0001 Force Device-nGnRE + */ +#define ARM_LPAE_PTE_MEMATTR_FWB_WB (((arm_lpae_iopte)0x6) << 2) #define ARM_LPAE_PTE_MEMATTR_OIWB (((arm_lpae_iopte)0xf) << 2) #define ARM_LPAE_PTE_MEMATTR_NC (((arm_lpae_iopte)0x5) << 2) #define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) @@ -458,12 +470,16 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, */ if (data->iop.fmt == ARM_64_LPAE_S2 || data->iop.fmt == ARM_32_LPAE_S2) { - if (prot & IOMMU_MMIO) + if (prot & IOMMU_MMIO) { pte |= ARM_LPAE_PTE_MEMATTR_DEV; - else if (prot & IOMMU_CACHE) - pte |= ARM_LPAE_PTE_MEMATTR_OIWB; - else + } else if (prot & IOMMU_CACHE) { + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_S2FWB) + pte |= ARM_LPAE_PTE_MEMATTR_FWB_WB; + else + pte |= ARM_LPAE_PTE_MEMATTR_OIWB; + } else { pte |= ARM_LPAE_PTE_MEMATTR_NC; + } } else { if (prot & IOMMU_MMIO) pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV @@ -1035,8 +1051,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) struct arm_lpae_io_pgtable *data; typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr; - /* The NS quirk doesn't apply at stage 2 */ - if (cfg->quirks) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_S2FWB)) return NULL; data = arm_lpae_alloc_pgtable(cfg); diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index b1ecfc3cd5bc..ce86b09ae80f 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -87,6 +87,7 @@ struct io_pgtable_cfg { * attributes set in the TCR for a non-coherent page-table walker. * * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable. + * IO_PGTABLE_QUIRK_ARM_S2FWB: Use the FWB format for the MemAttrs bits */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -95,6 +96,7 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) + #define IO_PGTABLE_QUIRK_ARM_S2FWB BIT(8) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; -- cgit v1.2.3 From f27298a82ba09a1c8aecee8a209b2a312beac672 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Oct 2024 21:20:55 -0300 Subject: iommu/arm-smmu-v3: Allow ATS for IOMMU_DOMAIN_NESTED The EATS flag needs to flow through the vSTE and into the pSTE, and ensure physical ATS is enabled on the PCI device. The physical ATS state must match the VM's idea of EATS as we rely on the VM to issue the ATS invalidation commands. Thus ATS must remain off at the device until EATS on a nesting domain turns it on. Attaching a nesting domain is the point where the invalidation responsibility transfers to userspace. Update the ATS logic to track EATS for nesting domains and flush the ATC whenever the S2 nesting parent changes. Link: https://patch.msgid.link/r/11-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 31 +++++++++++++++++++--- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 26 +++++++++++++++--- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 4 ++- include/uapi/linux/iommufd.h | 2 +- 4 files changed, 53 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index a1c8fcd4797c..84c8a21c00ae 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -95,8 +95,6 @@ static int arm_smmu_attach_dev_nested(struct iommu_domain *domain, .master = master, .old_domain = iommu_get_domain_for_dev(dev), .ssid = IOMMU_NO_PASID, - /* Currently invalidation of ATC is not supported */ - .disable_ats = true, }; struct arm_smmu_ste ste; int ret; @@ -107,6 +105,15 @@ static int arm_smmu_attach_dev_nested(struct iommu_domain *domain, return -EBUSY; mutex_lock(&arm_smmu_asid_lock); + /* + * The VM has to control the actual ATS state at the PCI device because + * we forward the invalidations directly from the VM. If the VM doesn't + * think ATS is on it will not generate ATC flushes and the ATC will + * become incoherent. Since we can't access the actual virtual PCI ATS + * config bit here base this off the EATS value in the STE. If the EATS + * is set then the VM must generate ATC flushes. + */ + state.disable_ats = !nested_domain->enable_ats; ret = arm_smmu_attach_prepare(&state, domain); if (ret) { mutex_unlock(&arm_smmu_asid_lock); @@ -131,8 +138,10 @@ static const struct iommu_domain_ops arm_smmu_nested_ops = { .free = arm_smmu_domain_nested_free, }; -static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg) +static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg, + bool *enable_ats) { + unsigned int eats; unsigned int cfg; if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) { @@ -149,6 +158,18 @@ static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg) if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS && cfg != STRTAB_STE_0_CFG_S1_TRANS) return -EIO; + + /* + * Only Full ATS or ATS UR is supported + * The EATS field will be set by arm_smmu_make_nested_domain_ste() + */ + eats = FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(arg->ste[1])); + arg->ste[1] &= ~cpu_to_le64(STRTAB_STE_1_EATS); + if (eats != STRTAB_STE_1_EATS_ABT && eats != STRTAB_STE_1_EATS_TRANS) + return -EIO; + + if (cfg == STRTAB_STE_0_CFG_S1_TRANS) + *enable_ats = (eats == STRTAB_STE_1_EATS_TRANS); return 0; } @@ -160,6 +181,7 @@ arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, const u32 SUPPORTED_FLAGS = IOMMU_HWPT_FAULT_ID_VALID; struct arm_smmu_nested_domain *nested_domain; struct iommu_hwpt_arm_smmuv3 arg; + bool enable_ats = false; int ret; /* @@ -175,7 +197,7 @@ arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, if (ret) return ERR_PTR(ret); - ret = arm_smmu_validate_vste(&arg); + ret = arm_smmu_validate_vste(&arg, &enable_ats); if (ret) return ERR_PTR(ret); @@ -185,6 +207,7 @@ arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, nested_domain->domain.type = IOMMU_DOMAIN_NESTED; nested_domain->domain.ops = &arm_smmu_nested_ops; + nested_domain->enable_ats = enable_ats; nested_domain->vsmmu = vsmmu; nested_domain->ste[0] = arg.ste[0]; nested_domain->ste[1] = arg.ste[1] & ~cpu_to_le64(STRTAB_STE_1_EATS); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index de598d66b5c2..b47f80224781 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2107,7 +2107,16 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, if (!master->ats_enabled) continue; - arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, &cmd); + if (master_domain->nested_ats_flush) { + /* + * If a S2 used as a nesting parent is changed we have + * no option but to completely flush the ATC. + */ + arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); + } else { + arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, + &cmd); + } for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; @@ -2631,7 +2640,7 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master) static struct arm_smmu_master_domain * arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, struct arm_smmu_master *master, - ioasid_t ssid) + ioasid_t ssid, bool nested_ats_flush) { struct arm_smmu_master_domain *master_domain; @@ -2640,7 +2649,8 @@ arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) { if (master_domain->master == master && - master_domain->ssid == ssid) + master_domain->ssid == ssid && + master_domain->nested_ats_flush == nested_ats_flush) return master_domain; } return NULL; @@ -2671,13 +2681,18 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, { struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain); struct arm_smmu_master_domain *master_domain; + bool nested_ats_flush = false; unsigned long flags; if (!smmu_domain) return; + if (domain->type == IOMMU_DOMAIN_NESTED) + nested_ats_flush = to_smmu_nested_domain(domain)->enable_ats; + spin_lock_irqsave(&smmu_domain->devices_lock, flags); - master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid); + master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid, + nested_ats_flush); if (master_domain) { list_del(&master_domain->devices_elm); kfree(master_domain); @@ -2744,6 +2759,9 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, return -ENOMEM; master_domain->master = master; master_domain->ssid = state->ssid; + if (new_domain->type == IOMMU_DOMAIN_NESTED) + master_domain->nested_ats_flush = + to_smmu_nested_domain(new_domain)->enable_ats; /* * During prepare we want the current smmu_domain and new diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 5a025d310dbe..01c1d16dc0c8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -305,7 +305,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_1_NESTING_ALLOWED \ cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR | \ STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH | \ - STRTAB_STE_1_S1STALLD) + STRTAB_STE_1_S1STALLD | STRTAB_STE_1_EATS) /* * Context descriptors. @@ -837,6 +837,7 @@ struct arm_smmu_domain { struct arm_smmu_nested_domain { struct iommu_domain domain; struct arm_vsmmu *vsmmu; + bool enable_ats : 1; __le64 ste[2]; }; @@ -878,6 +879,7 @@ struct arm_smmu_master_domain { struct list_head devices_elm; struct arm_smmu_master *master; ioasid_t ssid; + bool nested_ats_flush : 1; }; static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index b6baaa1e55b1..a66eb0384cd6 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -429,7 +429,7 @@ struct iommu_hwpt_vtd_s1 { * the translation. Must be little-endian. * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec) * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax - * - word-1: S1DSS, S1CIR, S1COR, S1CSH, S1STALLD + * - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD * * -EIO will be returned if @ste is not legal or contains any non-allowed field. * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass -- cgit v1.2.3 From d68beb276ba26cec47350a6d468e967673ee0c56 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 30 Oct 2024 21:20:56 -0300 Subject: iommu/arm-smmu-v3: Support IOMMU_HWPT_INVALIDATE using a VIOMMU object Implement the vIOMMU's cache_invalidate op for user space to invalidate the IOTLB entries, Device ATS and CD entries that are cached by hardware. Add struct iommu_viommu_arm_smmuv3_invalidate defining invalidation entries that are simply in the native format of a 128-bit TLBI command. Scan those commands against the permitted command list and fix their VMID/SID fields to match what is stored in the vIOMMU. Link: https://patch.msgid.link/r/12-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Co-developed-by: Eric Auger Signed-off-by: Eric Auger Co-developed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 134 +++++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 6 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 5 + include/uapi/linux/iommufd.h | 24 ++++ 4 files changed, 166 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index 84c8a21c00ae..c96cab6521a4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -215,8 +215,134 @@ arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, return &nested_domain->domain; } +static int arm_vsmmu_vsid_to_sid(struct arm_vsmmu *vsmmu, u32 vsid, u32 *sid) +{ + struct arm_smmu_master *master; + struct device *dev; + int ret = 0; + + xa_lock(&vsmmu->core.vdevs); + dev = iommufd_viommu_find_dev(&vsmmu->core, (unsigned long)vsid); + if (!dev) { + ret = -EIO; + goto unlock; + } + master = dev_iommu_priv_get(dev); + + /* At this moment, iommufd only supports PCI device that has one SID */ + if (sid) + *sid = master->streams[0].id; +unlock: + xa_unlock(&vsmmu->core.vdevs); + return ret; +} + +/* This is basically iommu_viommu_arm_smmuv3_invalidate in u64 for conversion */ +struct arm_vsmmu_invalidation_cmd { + union { + u64 cmd[2]; + struct iommu_viommu_arm_smmuv3_invalidate ucmd; + }; +}; + +/* + * Convert, in place, the raw invalidation command into an internal format that + * can be passed to arm_smmu_cmdq_issue_cmdlist(). Internally commands are + * stored in CPU endian. + * + * Enforce the VMID or SID on the command. + */ +static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu, + struct arm_vsmmu_invalidation_cmd *cmd) +{ + /* Commands are le64 stored in u64 */ + cmd->cmd[0] = le64_to_cpu(cmd->ucmd.cmd[0]); + cmd->cmd[1] = le64_to_cpu(cmd->ucmd.cmd[1]); + + switch (cmd->cmd[0] & CMDQ_0_OP) { + case CMDQ_OP_TLBI_NSNH_ALL: + /* Convert to NH_ALL */ + cmd->cmd[0] = CMDQ_OP_TLBI_NH_ALL | + FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid); + cmd->cmd[1] = 0; + break; + case CMDQ_OP_TLBI_NH_VA: + case CMDQ_OP_TLBI_NH_VAA: + case CMDQ_OP_TLBI_NH_ALL: + case CMDQ_OP_TLBI_NH_ASID: + cmd->cmd[0] &= ~CMDQ_TLBI_0_VMID; + cmd->cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid); + break; + case CMDQ_OP_ATC_INV: + case CMDQ_OP_CFGI_CD: + case CMDQ_OP_CFGI_CD_ALL: { + u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd[0]); + + if (arm_vsmmu_vsid_to_sid(vsmmu, vsid, &sid)) + return -EIO; + cmd->cmd[0] &= ~CMDQ_CFGI_0_SID; + cmd->cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid); + break; + } + default: + return -EIO; + } + return 0; +} + +static int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu, + struct iommu_user_data_array *array) +{ + struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core); + struct arm_smmu_device *smmu = vsmmu->smmu; + struct arm_vsmmu_invalidation_cmd *last; + struct arm_vsmmu_invalidation_cmd *cmds; + struct arm_vsmmu_invalidation_cmd *cur; + struct arm_vsmmu_invalidation_cmd *end; + int ret; + + cmds = kcalloc(array->entry_num, sizeof(*cmds), GFP_KERNEL); + if (!cmds) + return -ENOMEM; + cur = cmds; + end = cmds + array->entry_num; + + static_assert(sizeof(*cmds) == 2 * sizeof(u64)); + ret = iommu_copy_struct_from_full_user_array( + cmds, sizeof(*cmds), array, + IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3); + if (ret) + goto out; + + last = cmds; + while (cur != end) { + ret = arm_vsmmu_convert_user_cmd(vsmmu, cur); + if (ret) + goto out; + + /* FIXME work in blocks of CMDQ_BATCH_ENTRIES and copy each block? */ + cur++; + if (cur != end && (cur - last) != CMDQ_BATCH_ENTRIES - 1) + continue; + + /* FIXME always uses the main cmdq rather than trying to group by type */ + ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, last->cmd, + cur - last, true); + if (ret) { + cur--; + goto out; + } + last = cur; + } +out: + array->entry_num = cur - cmds; + kfree(cmds); + return ret; +} + static const struct iommufd_viommu_ops arm_vsmmu_ops = { .alloc_domain_nested = arm_vsmmu_alloc_domain_nested, + .cache_invalidate = arm_vsmmu_cache_invalidate, }; struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, @@ -239,6 +365,14 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, if (s2_parent->smmu != master->smmu) return ERR_PTR(-EINVAL); + /* + * FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW + * defect is needed to determine if arm_vsmmu_cache_invalidate() needs + * any change to remove this. + */ + if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) + return ERR_PTR(-EOPNOTSUPP); + /* * Must support some way to prevent the VM from bypassing the cache * because VFIO currently does not do any cache maintenance. canwbs diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index b47f80224781..2a9f2d1d3ed9 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -766,9 +766,9 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, * insert their own list of commands then all of the commands from one * CPU will appear before any of the commands from the other CPU. */ -static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, - struct arm_smmu_cmdq *cmdq, - u64 *cmds, int n, bool sync) +int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, u64 *cmds, int n, + bool sync) { u64 cmd_sync[CMDQ_ENT_DWORDS]; u32 prod; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 01c1d16dc0c8..af25f092303f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -529,6 +529,7 @@ struct arm_smmu_cmdq_ent { #define CMDQ_OP_TLBI_NH_ALL 0x10 #define CMDQ_OP_TLBI_NH_ASID 0x11 #define CMDQ_OP_TLBI_NH_VA 0x12 + #define CMDQ_OP_TLBI_NH_VAA 0x13 #define CMDQ_OP_TLBI_EL2_ALL 0x20 #define CMDQ_OP_TLBI_EL2_ASID 0x21 #define CMDQ_OP_TLBI_EL2_VA 0x22 @@ -951,6 +952,10 @@ void arm_smmu_attach_commit(struct arm_smmu_attach_state *state); void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master, const struct arm_smmu_ste *target); +int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, u64 *cmds, int n, + bool sync); + #ifdef CONFIG_ARM_SMMU_V3_SVA bool arm_smmu_sva_supported(struct arm_smmu_device *smmu); bool arm_smmu_master_sva_supported(struct arm_smmu_master *master); diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index a66eb0384cd6..747d3d9baa3d 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -713,9 +713,11 @@ struct iommu_hwpt_get_dirty_bitmap { * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation * Data Type * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 + * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3 */ enum iommu_hwpt_invalidate_data_type { IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0, + IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1, }; /** @@ -754,6 +756,28 @@ struct iommu_hwpt_vtd_s1_invalidate { __u32 __reserved; }; +/** + * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cahce invalidation + * (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3) + * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ. + * Must be little-endian. + * + * Supported command list only when passing in a vIOMMU via @hwpt_id: + * CMDQ_OP_TLBI_NSNH_ALL + * CMDQ_OP_TLBI_NH_VA + * CMDQ_OP_TLBI_NH_VAA + * CMDQ_OP_TLBI_NH_ALL + * CMDQ_OP_TLBI_NH_ASID + * CMDQ_OP_ATC_INV + * CMDQ_OP_CFGI_CD + * CMDQ_OP_CFGI_CD_ALL + * + * -EIO will be returned if the command is not supported. + */ +struct iommu_viommu_arm_smmuv3_invalidate { + __aligned_le64 cmd[2]; +}; + /** * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) * @size: sizeof(struct iommu_hwpt_invalidate) -- cgit v1.2.3 From 829ed626499c11c9d11c65e93febc1e0da7cd61b Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 13 Nov 2024 11:51:36 -0800 Subject: iommufd: Add IOMMU_IOAS_CHANGE_PROCESS Add an ioctl that updates all DMA mappings to reflect the current process, Change the mm and transfer locked memory accounting from old to current mm. This will be used for live update, allowing an old process to hand the iommufd device descriptor to a new process. The new process calls the ioctl. IOMMU_IOAS_CHANGE_PROCESS only supports DMA mappings created with IOMMU_IOAS_MAP_FILE, because the kernel metadata for such mappings does not depend on the userland VA of the pages (which is different in the new process). IOMMU_IOAS_CHANGE_PROCESS fails if other types of mappings are present. This is a revised version of code originally provided by Jason. Link: https://patch.msgid.link/r/1731527497-16091-4-git-send-email-steven.sistare@oracle.com Suggested-by: Jason Gunthorpe Signed-off-by: Steve Sistare Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/io_pagetable.h | 1 + drivers/iommu/iommufd/ioas.c | 147 ++++++++++++++++++++++++++++++++ drivers/iommu/iommufd/iommufd_private.h | 1 + drivers/iommu/iommufd/main.c | 2 + include/uapi/linux/iommufd.h | 23 +++++ 5 files changed, 174 insertions(+) (limited to 'include') diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h index f5f20fa639ef..10c928a9a463 100644 --- a/drivers/iommu/iommufd/io_pagetable.h +++ b/drivers/iommu/iommufd/io_pagetable.h @@ -173,6 +173,7 @@ enum { IOPT_PAGES_ACCOUNT_NONE = 0, IOPT_PAGES_ACCOUNT_USER = 1, IOPT_PAGES_ACCOUNT_MM = 2, + IOPT_PAGES_ACCOUNT_MODE_NUM = 3, }; enum iopt_address_type { diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index c82ed5a92e3b..1542c5fd10a8 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -439,6 +439,153 @@ static int iommufd_take_all_iova_rwsem(struct iommufd_ctx *ictx, return 0; } +static bool need_charge_update(struct iopt_pages *pages) +{ + switch (pages->account_mode) { + case IOPT_PAGES_ACCOUNT_NONE: + return false; + case IOPT_PAGES_ACCOUNT_MM: + return pages->source_mm != current->mm; + case IOPT_PAGES_ACCOUNT_USER: + /* + * Update when mm changes because it also accounts + * in mm->pinned_vm. + */ + return (pages->source_user != current_user()) || + (pages->source_mm != current->mm); + } + return true; +} + +static int charge_current(unsigned long *npinned) +{ + struct iopt_pages tmp = { + .source_mm = current->mm, + .source_task = current->group_leader, + .source_user = current_user(), + }; + unsigned int account_mode; + int rc; + + for (account_mode = 0; account_mode != IOPT_PAGES_ACCOUNT_MODE_NUM; + account_mode++) { + if (!npinned[account_mode]) + continue; + + tmp.account_mode = account_mode; + rc = iopt_pages_update_pinned(&tmp, npinned[account_mode], true, + NULL); + if (rc) + goto err_undo; + } + return 0; + +err_undo: + while (account_mode != 0) { + account_mode--; + if (!npinned[account_mode]) + continue; + tmp.account_mode = account_mode; + iopt_pages_update_pinned(&tmp, npinned[account_mode], false, + NULL); + } + return rc; +} + +static void change_mm(struct iopt_pages *pages) +{ + struct task_struct *old_task = pages->source_task; + struct user_struct *old_user = pages->source_user; + struct mm_struct *old_mm = pages->source_mm; + + pages->source_mm = current->mm; + mmgrab(pages->source_mm); + mmdrop(old_mm); + + pages->source_task = current->group_leader; + get_task_struct(pages->source_task); + put_task_struct(old_task); + + pages->source_user = get_uid(current_user()); + free_uid(old_user); +} + +#define for_each_ioas_area(_xa, _index, _ioas, _area) \ + xa_for_each((_xa), (_index), (_ioas)) \ + for (_area = iopt_area_iter_first(&_ioas->iopt, 0, ULONG_MAX); \ + _area; \ + _area = iopt_area_iter_next(_area, 0, ULONG_MAX)) + +int iommufd_ioas_change_process(struct iommufd_ucmd *ucmd) +{ + struct iommu_ioas_change_process *cmd = ucmd->cmd; + struct iommufd_ctx *ictx = ucmd->ictx; + unsigned long all_npinned[IOPT_PAGES_ACCOUNT_MODE_NUM] = {}; + struct iommufd_ioas *ioas; + struct iopt_area *area; + struct iopt_pages *pages; + struct xarray ioas_list; + unsigned long index; + int rc; + + if (cmd->__reserved) + return -EOPNOTSUPP; + + xa_init(&ioas_list); + rc = iommufd_take_all_iova_rwsem(ictx, &ioas_list); + if (rc) + return rc; + + for_each_ioas_area(&ioas_list, index, ioas, area) { + if (area->pages->type != IOPT_ADDRESS_FILE) { + rc = -EINVAL; + goto out; + } + } + + /* + * Count last_pinned pages, then clear it to avoid double counting + * if the same iopt_pages is visited multiple times in this loop. + * Since we are under all the locks, npinned == last_npinned, so we + * can easily restore last_npinned before we return. + */ + for_each_ioas_area(&ioas_list, index, ioas, area) { + pages = area->pages; + + if (need_charge_update(pages)) { + all_npinned[pages->account_mode] += pages->last_npinned; + pages->last_npinned = 0; + } + } + + rc = charge_current(all_npinned); + + if (rc) { + /* Charge failed. Fix last_npinned and bail. */ + for_each_ioas_area(&ioas_list, index, ioas, area) + area->pages->last_npinned = area->pages->npinned; + goto out; + } + + for_each_ioas_area(&ioas_list, index, ioas, area) { + pages = area->pages; + + /* Uncharge the old one (which also restores last_npinned) */ + if (need_charge_update(pages)) { + int r = iopt_pages_update_pinned(pages, pages->npinned, + false, NULL); + + if (WARN_ON(r)) + rc = r; + } + change_mm(pages); + } + +out: + iommufd_release_all_iova_rwsem(ictx, &ioas_list); + return rc; +} + int iommufd_option_rlimit_mode(struct iommu_option *cmd, struct iommufd_ctx *ictx) { diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 57c0c8f0f6a5..b6d706cf2c66 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -255,6 +255,7 @@ int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd); int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd); int iommufd_ioas_map(struct iommufd_ucmd *ucmd); int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd); +int iommufd_ioas_change_process(struct iommufd_ucmd *ucmd); int iommufd_ioas_copy(struct iommufd_ucmd *ucmd); int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd); int iommufd_ioas_option(struct iommufd_ucmd *ucmd); diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 13ac2286035e..0a96cc8f27da 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -349,6 +349,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { struct iommu_ioas_alloc, out_ioas_id), IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas, struct iommu_ioas_allow_iovas, allowed_iovas), + IOCTL_OP(IOMMU_IOAS_CHANGE_PROCESS, iommufd_ioas_change_process, + struct iommu_ioas_change_process, __reserved), IOCTL_OP(IOMMU_IOAS_COPY, iommufd_ioas_copy, struct iommu_ioas_copy, src_iova), IOCTL_OP(IOMMU_IOAS_IOVA_RANGES, iommufd_ioas_iova_ranges, diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 747d3d9baa3d..4ae8b1ee0444 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -54,6 +54,7 @@ enum { IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, + IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92, }; /** @@ -972,4 +973,26 @@ struct iommu_vdevice_alloc { __aligned_u64 virt_id; }; #define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC) + +/** + * struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS) + * @size: sizeof(struct iommu_ioas_change_process) + * @__reserved: Must be 0 + * + * This transfers pinned memory counts for every memory map in every IOAS + * in the context to the current process. This only supports maps created + * with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present. + * If the ioctl returns a failure status, then nothing is changed. + * + * This API is useful for transferring operation of a device from one process + * to another, such as during userland live update. + */ +struct iommu_ioas_change_process { + __u32 size; + __u32 __reserved; +}; + +#define IOMMU_IOAS_CHANGE_PROCESS \ + _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS) + #endif -- cgit v1.2.3