From ea92128fe7f6eef6ee5fcaaed521b1b2b5ab7c9a Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 13 Jun 2025 23:35:13 -0700 Subject: iommufd: Apply obvious cosmetic fixes Run clang-format but exclude those not so obvious ones, which leaves us: - Align indentations - Add missing spaces - Remove unnecessary spaces - Remove unnecessary line wrappings Link: https://patch.msgid.link/r/9132e1ab45690ab1959c66bbb51ac5536a635388.1749882255.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 34b6e6ca4bfa..498c9a768506 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -171,8 +171,9 @@ static inline void iommufd_access_unpin_pages(struct iommufd_access *access, { } -static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, - void *data, size_t len, unsigned int flags) +static inline int iommufd_access_rw(struct iommufd_access *access, + unsigned long iova, void *data, size_t len, + unsigned int flags) { return -EOPNOTSUPP; } -- cgit v1.2.3 From fc9c40e3a4faa09dbd643ae1bdaf8ad006c3bc28 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 13 Jun 2025 23:35:15 -0700 Subject: iommufd: Use enum iommu_viommu_type for type in struct iommufd_viommu Replace unsigned int, to make it clear. No functional changes. The viommu_alloc iommu op will be deprecated, so don't change that. Link: https://patch.msgid.link/r/6c6ba5c0cd381594f17ae74355872d78d7a022c0.1749882255.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Pranjal Shrivastava Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 498c9a768506..ac98e49e44fe 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -101,7 +101,7 @@ struct iommufd_viommu { struct list_head veventqs; struct rw_semaphore veventqs_rwsem; - unsigned int type; + enum iommu_viommu_type type; }; /** -- cgit v1.2.3 From 187f146d5de65d50d90a4f49157d381d8ae32939 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 13 Jun 2025 23:35:18 -0700 Subject: iommu: Introduce get_viommu_size and viommu_init ops So far, a vIOMMU object has been allocated by IOMMU driver and initialized with the driver-level structure, before it returns to the iommufd core for core-level structure initialization. It has been requiring iommufd core to expose some core structure/helpers in its driver.c file, which result in a size increase of this driver module. Meanwhile, IOMMU drivers are now requiring more vIOMMU-base structures for some advanced feature, such as the existing vDEVICE and a future HW_QUEUE. Initializing a core-structure later than driver-structure gives for-driver helpers some trouble, when they are used by IOMMU driver assuming that the new structure (including core) are fully initialized, for example: core: viommu = ops->viommu_alloc(); driver: // my_viommu is successfully allocated driver: my_viommu = iommufd_viommu_alloc(...); driver: // This may crash if it reads viommu->ictx driver: new = iommufd_new_viommu_helper(my_viommu->core ...); core: viommu->ictx = ucmd->ictx; core: ... To ease such a condition, allow the IOMMU driver to report the size of its vIOMMU structure, let the core allocate a vIOMMU object and initialize the core-level structure first, and then hand it over the driver to initialize its driver-level structure. Thus, this requires two new iommu ops, get_viommu_size and viommu_init, so iommufd core can communicate with drivers to replace the viommu_alloc op: core: viommu = ops->get_viommu_size(); driver: return VIOMMU_STRUCT_SIZE(); core: viommu->ictx = ucmd->ictx; // and others core: rc = ops->viommu_init(); driver: // This is safe now as viommu->ictx is inited driver: new = iommufd_new_viommu_helper(my_viommu->core ...); core: ... This also adds a VIOMMU_STRUCT_SIZE macro, for drivers to use, which would statically sanitize the driver structure. Link: https://patch.msgid.link/r/3ab52c5b622dad476c43b1b1f1636c8b902f1692.1749882255.git.nicolinc@nvidia.com Suggested-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Pranjal Shrivastava Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 15 +++++++++++++++ include/linux/iommufd.h | 6 ++++++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 156732807994..9be4ff370f1e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -14,6 +14,7 @@ #include #include #include +#include #define IOMMU_READ (1 << 0) #define IOMMU_WRITE (1 << 1) @@ -596,6 +597,16 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * - IOMMU_DOMAIN_DMA: must use a dma domain * - 0: use the default setting * @default_domain_ops: the default ops for domains + * @get_viommu_size: Get the size of a driver-level vIOMMU structure for a given + * @dev corresponding to @viommu_type. Driver should return 0 + * if vIOMMU isn't supported accordingly. It is required for + * driver to use the VIOMMU_STRUCT_SIZE macro to sanitize the + * driver-level vIOMMU structure related to the core one + * @viommu_init: Init the driver-level struct of an iommufd_viommu on a physical + * IOMMU instance @viommu->iommu_dev, as the set of virtualization + * resources shared/passed to user space IOMMU instance. Associate + * it with a nesting @parent_domain. It is required for driver to + * set @viommu->ops pointing to its own viommu_ops * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind * the @dev, as the set of virtualization resources shared/passed * to user space IOMMU instance. And associate it with a nesting @@ -654,6 +665,10 @@ struct iommu_ops { int (*def_domain_type)(struct device *dev); + size_t (*get_viommu_size)(struct device *dev, + enum iommu_viommu_type viommu_type); + int (*viommu_init)(struct iommufd_viommu *viommu, + struct iommu_domain *parent_domain); struct iommufd_viommu *(*viommu_alloc)( struct device *dev, struct iommu_domain *parent_domain, struct iommufd_ctx *ictx, unsigned int viommu_type); diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index ac98e49e44fe..423e08963d90 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -229,6 +229,12 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, } #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ +#define VIOMMU_STRUCT_SIZE(drv_struct, member) \ + (sizeof(drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \ + BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_viommu, \ + ((drv_struct *)NULL)->member))) + /* * Helpers for IOMMU driver to allocate driver structures that will be freed by * the iommufd core. The free op will be called prior to freeing the memory. -- cgit v1.2.3 From f842ea208e43066c43e5e91e20fe8ce600df7055 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 13 Jun 2025 23:35:23 -0700 Subject: iommu: Deprecate viommu_alloc op To ease the for-driver iommufd APIs, get_viommu_size and viommu_init ops are introduced. Now, those existing vIOMMU supported drivers implemented these two ops, replacing the viommu_alloc one. So, there is no use of it. Remove it from the headers and the viommu core. Link: https://patch.msgid.link/r/5b32d4499d7ed02a63e57a293c11b642d226ef8d.1749882255.git.nicolinc@nvidia.com Suggested-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Reviewed-by: Pranjal Shrivastava Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/viommu.c | 20 +++++--------------- include/linux/iommu.h | 11 ----------- include/linux/iommufd.h | 18 ------------------ 3 files changed, 5 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index 27a39f524840..044e3ef06e0f 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -33,8 +33,6 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) ops = dev_iommu_ops(idev->dev); if (!ops->get_viommu_size || !ops->viommu_init) { - if (ops->viommu_alloc) - goto get_hwpt_paging; rc = -EOPNOTSUPP; goto out_put_idev; } @@ -54,7 +52,6 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) goto out_put_idev; } -get_hwpt_paging: hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id); if (IS_ERR(hwpt_paging)) { rc = PTR_ERR(hwpt_paging); @@ -66,13 +63,8 @@ get_hwpt_paging: goto out_put_hwpt; } - if (ops->viommu_alloc) - viommu = ops->viommu_alloc(idev->dev, - hwpt_paging->common.domain, - ucmd->ictx, cmd->type); - else - viommu = (struct iommufd_viommu *)_iommufd_object_alloc( - ucmd->ictx, viommu_size, IOMMUFD_OBJ_VIOMMU); + viommu = (struct iommufd_viommu *)_iommufd_object_alloc( + ucmd->ictx, viommu_size, IOMMUFD_OBJ_VIOMMU); if (IS_ERR(viommu)) { rc = PTR_ERR(viommu); goto out_put_hwpt; @@ -92,11 +84,9 @@ get_hwpt_paging: */ viommu->iommu_dev = __iommu_get_iommu_dev(idev->dev); - if (!ops->viommu_alloc) { - rc = ops->viommu_init(viommu, hwpt_paging->common.domain); - if (rc) - goto out_abort; - } + rc = ops->viommu_init(viommu, hwpt_paging->common.domain); + if (rc) + goto out_abort; /* It is a driver bug that viommu->ops isn't filled */ if (WARN_ON_ONCE(!viommu->ops)) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9be4ff370f1e..04548b18df28 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -607,14 +607,6 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * resources shared/passed to user space IOMMU instance. Associate * it with a nesting @parent_domain. It is required for driver to * set @viommu->ops pointing to its own viommu_ops - * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind - * the @dev, as the set of virtualization resources shared/passed - * to user space IOMMU instance. And associate it with a nesting - * @parent_domain. The @viommu_type must be defined in the header - * include/uapi/linux/iommufd.h - * It is required to call iommufd_viommu_alloc() helper for - * a bundled allocation of the core and the driver structures, - * using the given @ictx pointer. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops * @identity_domain: An always available, always attachable identity @@ -669,9 +661,6 @@ struct iommu_ops { enum iommu_viommu_type viommu_type); int (*viommu_init)(struct iommufd_viommu *viommu, struct iommu_domain *parent_domain); - struct iommufd_viommu *(*viommu_alloc)( - struct device *dev, struct iommu_domain *parent_domain, - struct iommufd_ctx *ictx, unsigned int viommu_type); const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 423e08963d90..bf41b242b9f6 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -234,22 +234,4 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \ BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_viommu, \ ((drv_struct *)NULL)->member))) - -/* - * Helpers for IOMMU driver to allocate driver structures that will be freed by - * the iommufd core. The free op will be called prior to freeing the memory. - */ -#define iommufd_viommu_alloc(ictx, drv_struct, member, viommu_ops) \ - ({ \ - drv_struct *ret; \ - \ - static_assert(__same_type(struct iommufd_viommu, \ - ((drv_struct *)NULL)->member)); \ - static_assert(offsetof(drv_struct, member.obj) == 0); \ - ret = (drv_struct *)_iommufd_object_alloc( \ - ictx, sizeof(drv_struct), IOMMUFD_OBJ_VIOMMU); \ - if (!IS_ERR(ret)) \ - ret->member.ops = viommu_ops; \ - ret; \ - }) #endif -- cgit v1.2.3 From 17a93473a552fc0ffdfb04e69a26946afd4a046a Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 13 Jun 2025 23:35:24 -0700 Subject: iommufd: Move _iommufd_object_alloc out of driver.c Now, all driver structures will be allocated by the core, i.e. no longer a need of driver calling _iommufd_object_alloc. Thus, move it back. Before: text data bss dec hex filename 3024 180 0 3204 c84 drivers/iommu/iommufd/driver.o 9074 610 64 9748 2614 drivers/iommu/iommufd/main.o After: text data bss dec hex filename 2665 164 0 2829 b0d drivers/iommu/iommufd/driver.o 9410 618 64 10092 276c drivers/iommu/iommufd/main.o Link: https://patch.msgid.link/r/79e630c7b911930cf36e3c8a775a04e66c528d65.1749882255.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/driver.c | 33 --------------------------------- drivers/iommu/iommufd/iommufd_private.h | 4 ++++ drivers/iommu/iommufd/main.c | 32 ++++++++++++++++++++++++++++++++ include/linux/iommufd.h | 10 ---------- 4 files changed, 36 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index 922cd1fe7ec2..2fee399a148e 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -3,39 +3,6 @@ */ #include "iommufd_private.h" -struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, - size_t size, - enum iommufd_object_type type) -{ - struct iommufd_object *obj; - int rc; - - obj = kzalloc(size, GFP_KERNEL_ACCOUNT); - if (!obj) - return ERR_PTR(-ENOMEM); - obj->type = type; - /* Starts out bias'd by 1 until it is removed from the xarray */ - refcount_set(&obj->shortterm_users, 1); - refcount_set(&obj->users, 1); - - /* - * Reserve an ID in the xarray but do not publish the pointer yet since - * the caller hasn't initialized it yet. Once the pointer is published - * in the xarray and visible to other threads we can't reliably destroy - * it anymore, so the caller must complete all errorable operations - * before calling iommufd_object_finalize(). - */ - rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b, - GFP_KERNEL_ACCOUNT); - if (rc) - goto out_free; - return obj; -out_free: - kfree(obj); - return ERR_PTR(rc); -} -EXPORT_SYMBOL_NS_GPL(_iommufd_object_alloc, "IOMMUFD"); - /* Caller should xa_lock(&viommu->vdevs) to protect the return value */ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 32f0631368e1..ec5b499d139c 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -230,6 +230,10 @@ iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, iommufd_object_remove(ictx, obj, obj->id, 0); } +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type); + #define __iommufd_object_alloc(ictx, ptr, type, obj) \ container_of(_iommufd_object_alloc( \ ictx, \ diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 347c56ef44d8..85ad2853da0b 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -29,6 +29,38 @@ struct iommufd_object_ops { static const struct iommufd_object_ops iommufd_object_ops[]; static struct miscdevice vfio_misc_dev; +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type) +{ + struct iommufd_object *obj; + int rc; + + obj = kzalloc(size, GFP_KERNEL_ACCOUNT); + if (!obj) + return ERR_PTR(-ENOMEM); + obj->type = type; + /* Starts out bias'd by 1 until it is removed from the xarray */ + refcount_set(&obj->shortterm_users, 1); + refcount_set(&obj->users, 1); + + /* + * Reserve an ID in the xarray but do not publish the pointer yet since + * the caller hasn't initialized it yet. Once the pointer is published + * in the xarray and visible to other threads we can't reliably destroy + * it anymore, so the caller must complete all errorable operations + * before calling iommufd_object_finalize(). + */ + rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b, + GFP_KERNEL_ACCOUNT); + if (rc) + goto out_free; + return obj; +out_free: + kfree(obj); + return ERR_PTR(rc); +} + /* * Allow concurrent access to the object. * diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index bf41b242b9f6..2d1bf2f97ee3 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -190,9 +190,6 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) #endif /* CONFIG_IOMMUFD */ #if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE) -struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, - size_t size, - enum iommufd_object_type type); struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, @@ -201,13 +198,6 @@ int iommufd_viommu_report_event(struct iommufd_viommu *viommu, enum iommu_veventq_type type, void *event_data, size_t data_len); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ -static inline struct iommufd_object * -_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, - enum iommufd_object_type type) -{ - return ERR_PTR(-EOPNOTSUPP); -} - static inline struct device * iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) { -- cgit v1.2.3 From 4b57c057f9e6668ae442b19902dab8a73fe7b209 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:58:56 -0700 Subject: iommu: Use enum iommu_hw_info_type for type in hw_info op Replace u32 to make it clear. No functional changes. Also simplify the kdoc since the type itself is clear enough. Link: https://patch.msgid.link/r/651c50dee8ab900f691202ef0204cd5a43fdd6a2.1752126748.git.nicolinc@nvidia.com Reviewed-by: Pranjal Shrivastava Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 3 ++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 ++- drivers/iommu/intel/iommu.c | 3 ++- drivers/iommu/iommufd/selftest.c | 3 ++- include/linux/iommu.h | 6 +++--- 5 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index 9f59c95a254c..69bbe39e28de 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -7,7 +7,8 @@ #include "arm-smmu-v3.h" -void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type) +void *arm_smmu_hw_info(struct device *dev, u32 *length, + enum iommu_hw_info_type *type) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct iommu_hw_info_arm_smmuv3 *info; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index bb39af84e6b0..04463a4aaa26 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -1033,7 +1033,8 @@ struct arm_vsmmu { }; #if IS_ENABLED(CONFIG_ARM_SMMU_V3_IOMMUFD) -void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type); +void *arm_smmu_hw_info(struct device *dev, u32 *length, + enum iommu_hw_info_type *type); size_t arm_smmu_get_viommu_size(struct device *dev, enum iommu_viommu_type viommu_type); int arm_vsmmu_init(struct iommufd_viommu *viommu, diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7aa3932251b2..850f1a6f548c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4091,7 +4091,8 @@ out_remove_dev_pasid: return ret; } -static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type) +static void *intel_iommu_hw_info(struct device *dev, u32 *length, + enum iommu_hw_info_type *type) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 74ca955a766e..7a9abe3f47d5 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -287,7 +287,8 @@ static struct iommu_domain mock_blocking_domain = { .ops = &mock_blocking_ops, }; -static void *mock_domain_hw_info(struct device *dev, u32 *length, u32 *type) +static void *mock_domain_hw_info(struct device *dev, u32 *length, + enum iommu_hw_info_type *type) { struct iommu_test_hw_info *info; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 04548b18df28..b87c2841e6bc 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -563,8 +563,7 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * @capable: check capability * @hw_info: report iommu hardware information. The data buffer returned by this * op is allocated in the iommu driver and freed by the caller after - * use. The information type is one of enum iommu_hw_info_type defined - * in include/uapi/linux/iommufd.h. + * use. * @domain_alloc: Do not use in new drivers * @domain_alloc_identity: allocate an IDENTITY domain. Drivers should prefer to * use identity_domain instead. This should only be used @@ -623,7 +622,8 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); - void *(*hw_info)(struct device *dev, u32 *length, u32 *type); + void *(*hw_info)(struct device *dev, u32 *length, + enum iommu_hw_info_type *type); /* Domain allocation and freeing by the iommu driver */ #if IS_ENABLED(CONFIG_FSL_PAMU) -- cgit v1.2.3 From 3fcf56a2393b399f289a473181ce6b19f716b59d Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:58:57 -0700 Subject: iommu: Add iommu_copy_struct_to_user helper Similar to the iommu_copy_struct_from_user helper receiving data from the user space, add an iommu_copy_struct_to_user helper to report output data back to the user space data pointer. Link: https://patch.msgid.link/r/fa292c2a730aadd77085ec3a8272360c96eabb9c.1752126748.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Pranjal Shrivastava Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b87c2841e6bc..fd7319706684 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -558,6 +558,46 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, return 0; } +/** + * __iommu_copy_struct_to_user - Report iommu driver specific user space data + * @dst_data: Pointer to a struct iommu_user_data for user space data location + * @src_data: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @data_type: The data type of the @src_data. Must match with @dst_data.type + * @data_len: Length of current user data structure, i.e. sizeof(struct _src) + * @min_len: Initial length of user data structure for backward compatibility. + * This should be offsetofend using the last member in the user data + * struct that was initially added to include/uapi/linux/iommufd.h + */ +static inline int +__iommu_copy_struct_to_user(const struct iommu_user_data *dst_data, + void *src_data, unsigned int data_type, + size_t data_len, size_t min_len) +{ + if (WARN_ON(!dst_data || !src_data)) + return -EINVAL; + if (dst_data->type != data_type) + return -EINVAL; + if (dst_data->len < min_len || data_len < dst_data->len) + return -EINVAL; + return copy_struct_to_user(dst_data->uptr, dst_data->len, src_data, + data_len, NULL); +} + +/** + * iommu_copy_struct_to_user - Report iommu driver specific user space data + * @user_data: Pointer to a struct iommu_user_data for user space data location + * @ksrc: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @data_type: The data type of the @ksrc. Must match with @user_data->type + * @min_last: The last member of the data structure @ksrc points in the initial + * version. + * Return 0 for success, otherwise -error. + */ +#define iommu_copy_struct_to_user(user_data, ksrc, data_type, min_last) \ + __iommu_copy_struct_to_user(user_data, ksrc, data_type, sizeof(*ksrc), \ + offsetofend(typeof(*ksrc), min_last)) + /** * struct iommu_ops - iommu ops and capabilities * @capable: check capability -- cgit v1.2.3 From c3436d42f812faffac94f8fb3fb246ab43ffdffe Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:58:58 -0700 Subject: iommu: Pass in a driver-level user data structure to viommu_init op The new type of vIOMMU for tegra241-cmdqv allows user space VM to use one of its virtual command queue HW resources exclusively. This requires user space to mmap the corresponding MMIO page from kernel space for direct HW control. To forward the mmap info (offset and length), iommufd should add a driver specific data structure to the IOMMUFD_CMD_VIOMMU_ALLOC ioctl, for driver to output the info during the vIOMMU initialization back to user space. Similar to the existing ioctls and their IOMMU handlers, add a user_data to viommu_init op to bridge between iommufd and drivers. Link: https://patch.msgid.link/r/90bd5637dab7f5507c7a64d2c4826e70431e45a4.1752126748.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Pranjal Shrivastava Reviewed-by: Kevin Tian Reviewed-by: Vasant Hegde Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 3 ++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 ++- drivers/iommu/iommufd/selftest.c | 3 ++- drivers/iommu/iommufd/viommu.c | 2 +- include/linux/iommu.h | 3 ++- 5 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index 69bbe39e28de..170d69162848 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -419,7 +419,8 @@ size_t arm_smmu_get_viommu_size(struct device *dev, } int arm_vsmmu_init(struct iommufd_viommu *viommu, - struct iommu_domain *parent_domain) + struct iommu_domain *parent_domain, + const struct iommu_user_data *user_data) { struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core); struct arm_smmu_device *smmu = diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 04463a4aaa26..c1ced4d4b6d1 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -1038,7 +1038,8 @@ void *arm_smmu_hw_info(struct device *dev, u32 *length, size_t arm_smmu_get_viommu_size(struct device *dev, enum iommu_viommu_type viommu_type); int arm_vsmmu_init(struct iommufd_viommu *viommu, - struct iommu_domain *parent_domain); + struct iommu_domain *parent_domain, + const struct iommu_user_data *user_data); int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, struct arm_smmu_nested_domain *nested_domain); void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state); diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 7a9abe3f47d5..0d896a89ace7 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -779,7 +779,8 @@ static size_t mock_get_viommu_size(struct device *dev, } static int mock_viommu_init(struct iommufd_viommu *viommu, - struct iommu_domain *parent_domain) + struct iommu_domain *parent_domain, + const struct iommu_user_data *user_data) { struct mock_iommu_device *mock_iommu = container_of( viommu->iommu_dev, struct mock_iommu_device, iommu_dev); diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index bc8796e6684e..2009a421efae 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -84,7 +84,7 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) */ viommu->iommu_dev = __iommu_get_iommu_dev(idev->dev); - rc = ops->viommu_init(viommu, hwpt_paging->common.domain); + rc = ops->viommu_init(viommu, hwpt_paging->common.domain, NULL); if (rc) goto out_put_hwpt; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index fd7319706684..e06a0fbe4bc7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -700,7 +700,8 @@ struct iommu_ops { size_t (*get_viommu_size)(struct device *dev, enum iommu_viommu_type viommu_type); int (*viommu_init)(struct iommufd_viommu *viommu, - struct iommu_domain *parent_domain); + struct iommu_domain *parent_domain, + const struct iommu_user_data *user_data); const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; -- cgit v1.2.3 From ed42eee797ff3dc889ade63c1dd7c4f430699e23 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:04 -0700 Subject: iommufd/viommu: Add driver-defined vDEVICE support NVIDIA VCMDQ driver will have a driver-defined vDEVICE structure and do some HW configurations with that. To allow IOMMU drivers to define their own vDEVICE structures, move the struct iommufd_vdevice to the public header and provide a pair of viommu ops, similar to get_viommu_size and viommu_init. Doing this, however, creates a new window between the vDEVICE allocation and its driver-level initialization, during which an abort could happen but it can't invoke a driver destroy function from the struct viommu_ops since the driver structure isn't initialized yet. vIOMMU object doesn't have this problem, since its destroy op is set via the viommu_ops by the driver viommu_init function. Thus, vDEVICE should do something similar: add a destroy function pointer inside the struct iommufd_vdevice instead of the struct iommufd_viommu_ops. Note that there is unlikely a use case for a type dependent vDEVICE, so a static vdevice_size is probably enough for the near term instead of a get_vdevice_size function op. Link: https://patch.msgid.link/r/1e751c01da7863c669314d8e27fdb89eabcf5605.1752126748.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Pranjal Shrivastava Reviewed-by: Lu Baolu Reviewed-by: Vasant Hegde Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iommufd_private.h | 12 ------------ drivers/iommu/iommufd/viommu.c | 26 +++++++++++++++++++++++++- include/linux/iommufd.h | 31 +++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 9fdbf5f21f2e..06b8c2e2d9e6 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -653,18 +653,6 @@ void iommufd_viommu_destroy(struct iommufd_object *obj); int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_vdevice_destroy(struct iommufd_object *obj); -struct iommufd_vdevice { - struct iommufd_object obj; - struct iommufd_viommu *viommu; - struct device *dev; - - /* - * Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID of - * AMD IOMMU, and vRID of Intel VT-d - */ - u64 virt_id; -}; - #ifdef CONFIG_IOMMUFD_TEST int iommufd_test(struct iommufd_ucmd *ucmd); void iommufd_selftest_destroy(struct iommufd_object *obj); diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index c0365849f849..081ee6697a11 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -116,6 +116,8 @@ void iommufd_vdevice_destroy(struct iommufd_object *obj) container_of(obj, struct iommufd_vdevice, obj); struct iommufd_viommu *viommu = vdev->viommu; + if (vdev->destroy) + vdev->destroy(vdev); /* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */ xa_cmpxchg(&viommu->vdevs, vdev->virt_id, vdev, NULL, GFP_KERNEL); refcount_dec(&viommu->obj.users); @@ -126,6 +128,7 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) { struct iommu_vdevice_alloc *cmd = ucmd->cmd; struct iommufd_vdevice *vdev, *curr; + size_t vdev_size = sizeof(*vdev); struct iommufd_viommu *viommu; struct iommufd_device *idev; u64 virt_id = cmd->virt_id; @@ -150,7 +153,22 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) goto out_put_idev; } - vdev = iommufd_object_alloc_ucmd(ucmd, vdev, IOMMUFD_OBJ_VDEVICE); + if (viommu->ops && viommu->ops->vdevice_size) { + /* + * It is a driver bug for: + * - ops->vdevice_size smaller than the core structure size + * - not implementing a pairing ops->vdevice_init op + */ + if (WARN_ON_ONCE(viommu->ops->vdevice_size < vdev_size || + !viommu->ops->vdevice_init)) { + rc = -EOPNOTSUPP; + goto out_put_idev; + } + vdev_size = viommu->ops->vdevice_size; + } + + vdev = (struct iommufd_vdevice *)_iommufd_object_alloc_ucmd( + ucmd, vdev_size, IOMMUFD_OBJ_VDEVICE); if (IS_ERR(vdev)) { rc = PTR_ERR(vdev); goto out_put_idev; @@ -168,6 +186,12 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) goto out_put_idev; } + if (viommu->ops && viommu->ops->vdevice_init) { + rc = viommu->ops->vdevice_init(vdev); + if (rc) + goto out_put_idev; + } + cmd->out_vdevice_id = vdev->obj.id; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 2d1bf2f97ee3..bdd10a85eeef 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -104,6 +104,21 @@ struct iommufd_viommu { enum iommu_viommu_type type; }; +struct iommufd_vdevice { + struct iommufd_object obj; + struct iommufd_viommu *viommu; + struct device *dev; + + /* + * Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID of + * AMD IOMMU, and vRID of Intel VT-d + */ + u64 virt_id; + + /* Clean up all driver-specific parts of an iommufd_vdevice */ + void (*destroy)(struct iommufd_vdevice *vdev); +}; + /** * struct iommufd_viommu_ops - vIOMMU specific operations * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory @@ -120,6 +135,14 @@ struct iommufd_viommu { * array->entry_num to report the number of handled requests. * The data structure of the array entry must be defined in * include/uapi/linux/iommufd.h + * @vdevice_size: Size of the driver-defined vDEVICE structure per this vIOMMU + * @vdevice_init: Initialize the driver-level structure of a vDEVICE object, or + * related HW procedure. @vdev is already initialized by iommufd + * core: vdev->dev and vdev->viommu pointers; vdev->id carries a + * per-vIOMMU virtual ID (refer to struct iommu_vdevice_alloc in + * include/uapi/linux/iommufd.h) + * If driver has a deinit function to revert what vdevice_init op + * does, it should set it to the @vdev->destroy function pointer */ struct iommufd_viommu_ops { void (*destroy)(struct iommufd_viommu *viommu); @@ -128,6 +151,8 @@ struct iommufd_viommu_ops { const struct iommu_user_data *user_data); int (*cache_invalidate)(struct iommufd_viommu *viommu, struct iommu_user_data_array *array); + const size_t vdevice_size; + int (*vdevice_init)(struct iommufd_vdevice *vdev); }; #if IS_ENABLED(CONFIG_IOMMUFD) @@ -224,4 +249,10 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \ BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_viommu, \ ((drv_struct *)NULL)->member))) + +#define VDEVICE_STRUCT_SIZE(drv_struct, member) \ + (sizeof(drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \ + BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_vdevice, \ + ((drv_struct *)NULL)->member))) #endif -- cgit v1.2.3 From e2e9360022585c21dc30d2b19f5866c252f40806 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:05 -0700 Subject: iommufd/viommu: Introduce IOMMUFD_OBJ_HW_QUEUE and its related struct Add IOMMUFD_OBJ_HW_QUEUE with an iommufd_hw_queue structure, representing a HW-accelerated queue type of IOMMU's physical queue that can be passed through to a user space VM for direct hardware control, such as: - NVIDIA's Virtual Command Queue - AMD vIOMMU's Command Buffer, Event Log Buffers, and PPR Log Buffers Add new viommu ops for iommufd to communicate with IOMMU drivers to fetch supported HW queue structure size and to forward user space ioctls to the IOMMU drivers for initialization/destroy. As the existing HWs, NVIDIA's VCMDQs access the guest memory via physical addresses, while AMD's Buffers access the guest memory via guest physical addresses (i.e. iova of the nesting parent HWPT). Separate two mutually exclusive hw_queue_init and hw_queue_init_phys ops to indicate whether a vIOMMU HW accesses the guest queue in the guest physical space (via iova) or the host physical space (via pa). In a latter case, the iommufd core will validate the physical pages of a given guest queue, to ensure the underlying physical pages are contiguous and pinned. Since this is introduced with NVIDIA's VCMDQs, add hw_queue_init_phys for now, and leave some notes for hw_queue_init in the near future (for AMD). Either NVIDIA's or AMD's HW is a multi-queue model: NVIDIA's will be only one type in enum iommu_hw_queue_type, while AMD's will be three different types (two of which will have multi queues). Compared to letting the core manage multiple queues with three types per vIOMMU object, it'd be easier for the driver to manage that by having three different driver-structure arrays per vIOMMU object. Thus, pass in the index to the init op. Link: https://patch.msgid.link/r/6939b73699e278e60ce167e911b3d9be68882bad.1752126748.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Pranjal Shrivastava Reviewed-by: Vasant Hegde Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 42 ++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/iommufd.h | 9 +++++++++ 2 files changed, 51 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index bdd10a85eeef..f13f3ca6adb5 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -37,6 +37,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_VIOMMU, IOMMUFD_OBJ_VDEVICE, IOMMUFD_OBJ_VEVENTQ, + IOMMUFD_OBJ_HW_QUEUE, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -119,6 +120,19 @@ struct iommufd_vdevice { void (*destroy)(struct iommufd_vdevice *vdev); }; +struct iommufd_hw_queue { + struct iommufd_object obj; + struct iommufd_viommu *viommu; + + u64 base_addr; /* in guest physical address space */ + size_t length; + + enum iommu_hw_queue_type type; + + /* Clean up all driver-specific parts of an iommufd_hw_queue */ + void (*destroy)(struct iommufd_hw_queue *hw_queue); +}; + /** * struct iommufd_viommu_ops - vIOMMU specific operations * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory @@ -143,6 +157,22 @@ struct iommufd_vdevice { * include/uapi/linux/iommufd.h) * If driver has a deinit function to revert what vdevice_init op * does, it should set it to the @vdev->destroy function pointer + * @get_hw_queue_size: Get the size of a driver-defined HW queue structure for a + * given @viommu corresponding to @queue_type. Driver should + * return 0 if HW queue aren't supported accordingly. It is + * required for driver to use the HW_QUEUE_STRUCT_SIZE macro + * to sanitize the driver-level HW queue structure related + * to the core one + * @hw_queue_init_phys: Initialize the driver-level structure of a HW queue that + * is initialized with its core-level structure that holds + * all the info about a guest queue memory. + * Driver providing this op indicates that HW accesses the + * guest queue memory via physical addresses. + * @index carries the logical HW QUEUE ID per vIOMMU in a + * guest VM, for a multi-queue model. @base_addr_pa carries + * the physical location of the guest queue + * If driver has a deinit function to revert what this op + * does, it should set it to the @hw_queue->destroy pointer */ struct iommufd_viommu_ops { void (*destroy)(struct iommufd_viommu *viommu); @@ -153,6 +183,11 @@ struct iommufd_viommu_ops { struct iommu_user_data_array *array); const size_t vdevice_size; int (*vdevice_init)(struct iommufd_vdevice *vdev); + size_t (*get_hw_queue_size)(struct iommufd_viommu *viommu, + enum iommu_hw_queue_type queue_type); + /* AMD's HW will add hw_queue_init simply using @hw_queue->base_addr */ + int (*hw_queue_init_phys)(struct iommufd_hw_queue *hw_queue, u32 index, + phys_addr_t base_addr_pa); }; #if IS_ENABLED(CONFIG_IOMMUFD) @@ -255,4 +290,11 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \ BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_vdevice, \ ((drv_struct *)NULL)->member))) + +#define HW_QUEUE_STRUCT_SIZE(drv_struct, member) \ + (sizeof(drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \ + BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_hw_queue, \ + ((drv_struct *)NULL)->member))) + #endif diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 04eee77335cf..640a8b5147c2 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -1147,4 +1147,13 @@ struct iommu_veventq_alloc { __u32 __reserved; }; #define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC) + +/** + * enum iommu_hw_queue_type - HW Queue Type + * @IOMMU_HW_QUEUE_TYPE_DEFAULT: Reserved for future use + */ +enum iommu_hw_queue_type { + IOMMU_HW_QUEUE_TYPE_DEFAULT = 0, +}; + #endif -- cgit v1.2.3 From 2238ddc2b0560734c2dabb1c1fb4b342b5193625 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:06 -0700 Subject: iommufd/viommu: Add IOMMUFD_CMD_HW_QUEUE_ALLOC ioctl Introduce a new IOMMUFD_CMD_HW_QUEUE_ALLOC ioctl for user space to allocate a HW QUEUE object for a vIOMMU specific HW-accelerated queue, e.g.: - NVIDIA's Virtual Command Queue - AMD vIOMMU's Command Buffer, Event Log Buffers, and PPR Log Buffers Since this is introduced with NVIDIA's VCMDQs that access the guest memory in the physical address space, add an iommufd_hw_queue_alloc_phys() helper that will create an access object to the queue memory in the IOAS, to avoid the mappings of the guest memory from being unmapped, during the life cycle of the HW queue object. AMD's HW will need an hw_queue_init op that is mutually exclusive with the hw_queue_init_phys op, and their case will bypass the access part, i.e. no iommufd_hw_queue_alloc_phys() call. Link: https://patch.msgid.link/r/dab4ace747deb46c1fe70a5c663307f46990ae56.1752126748.git.nicolinc@nvidia.com Reviewed-by: Pranjal Shrivastava Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iommufd_private.h | 2 + drivers/iommu/iommufd/main.c | 6 ++ drivers/iommu/iommufd/viommu.c | 180 ++++++++++++++++++++++++++++++++ include/linux/iommufd.h | 1 + include/uapi/linux/iommufd.h | 34 ++++++ 5 files changed, 223 insertions(+) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 06b8c2e2d9e6..dcd609573244 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -652,6 +652,8 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_viommu_destroy(struct iommufd_object *obj); int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_vdevice_destroy(struct iommufd_object *obj); +int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd); +void iommufd_hw_queue_destroy(struct iommufd_object *obj); #ifdef CONFIG_IOMMUFD_TEST int iommufd_test(struct iommufd_ucmd *ucmd); diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 778694d7c207..4e8dbbfac890 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -354,6 +354,7 @@ union ucmd_buffer { struct iommu_destroy destroy; struct iommu_fault_alloc fault; struct iommu_hw_info info; + struct iommu_hw_queue_alloc hw_queue; struct iommu_hwpt_alloc hwpt; struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap; struct iommu_hwpt_invalidate cache; @@ -396,6 +397,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { struct iommu_fault_alloc, out_fault_fd), IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info, __reserved), + IOCTL_OP(IOMMU_HW_QUEUE_ALLOC, iommufd_hw_queue_alloc_ioctl, + struct iommu_hw_queue_alloc, length), IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, __reserved), IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap, @@ -559,6 +562,9 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { [IOMMUFD_OBJ_FAULT] = { .destroy = iommufd_fault_destroy, }, + [IOMMUFD_OBJ_HW_QUEUE] = { + .destroy = iommufd_hw_queue_destroy, + }, [IOMMUFD_OBJ_HWPT_PAGING] = { .destroy = iommufd_hwpt_paging_destroy, .abort = iommufd_hwpt_paging_abort, diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index 081ee6697a11..91339f799916 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -201,3 +201,183 @@ out_put_viommu: iommufd_put_object(ucmd->ictx, &viommu->obj); return rc; } + +static void iommufd_hw_queue_destroy_access(struct iommufd_ctx *ictx, + struct iommufd_access *access, + u64 base_iova, size_t length) +{ + u64 aligned_iova = PAGE_ALIGN_DOWN(base_iova); + u64 offset = base_iova - aligned_iova; + + iommufd_access_unpin_pages(access, aligned_iova, + PAGE_ALIGN(length + offset)); + iommufd_access_detach_internal(access); + iommufd_access_destroy_internal(ictx, access); +} + +void iommufd_hw_queue_destroy(struct iommufd_object *obj) +{ + struct iommufd_hw_queue *hw_queue = + container_of(obj, struct iommufd_hw_queue, obj); + + if (hw_queue->destroy) + hw_queue->destroy(hw_queue); + if (hw_queue->access) + iommufd_hw_queue_destroy_access(hw_queue->viommu->ictx, + hw_queue->access, + hw_queue->base_addr, + hw_queue->length); + if (hw_queue->viommu) + refcount_dec(&hw_queue->viommu->obj.users); +} + +/* + * When the HW accesses the guest queue via physical addresses, the underlying + * physical pages of the guest queue must be contiguous. Also, for the security + * concern that IOMMUFD_CMD_IOAS_UNMAP could potentially remove the mappings of + * the guest queue from the nesting parent iopt while the HW is still accessing + * the guest queue memory physically, such a HW queue must require an access to + * pin the underlying pages and prevent that from happening. + */ +static struct iommufd_access * +iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd, + struct iommufd_viommu *viommu, phys_addr_t *base_pa) +{ + u64 aligned_iova = PAGE_ALIGN_DOWN(cmd->nesting_parent_iova); + u64 offset = cmd->nesting_parent_iova - aligned_iova; + struct iommufd_access *access; + struct page **pages; + size_t max_npages; + size_t length; + size_t i; + int rc; + + /* max_npages = DIV_ROUND_UP(offset + cmd->length, PAGE_SIZE) */ + if (check_add_overflow(offset, cmd->length, &length)) + return ERR_PTR(-ERANGE); + if (check_add_overflow(length, PAGE_SIZE - 1, &length)) + return ERR_PTR(-ERANGE); + max_npages = length / PAGE_SIZE; + /* length needs to be page aligned too */ + length = max_npages * PAGE_SIZE; + + /* + * Use kvcalloc() to avoid memory fragmentation for a large page array. + * Set __GFP_NOWARN to avoid syzkaller blowups + */ + pages = kvcalloc(max_npages, sizeof(*pages), GFP_KERNEL | __GFP_NOWARN); + if (!pages) + return ERR_PTR(-ENOMEM); + + access = iommufd_access_create_internal(viommu->ictx); + if (IS_ERR(access)) { + rc = PTR_ERR(access); + goto out_free; + } + + rc = iommufd_access_attach_internal(access, viommu->hwpt->ioas); + if (rc) + goto out_destroy; + + rc = iommufd_access_pin_pages(access, aligned_iova, length, pages, 0); + if (rc) + goto out_detach; + + /* Validate if the underlying physical pages are contiguous */ + for (i = 1; i < max_npages; i++) { + if (page_to_pfn(pages[i]) == page_to_pfn(pages[i - 1]) + 1) + continue; + rc = -EFAULT; + goto out_unpin; + } + + *base_pa = (page_to_pfn(pages[0]) << PAGE_SHIFT) + offset; + kfree(pages); + return access; + +out_unpin: + iommufd_access_unpin_pages(access, aligned_iova, length); +out_detach: + iommufd_access_detach_internal(access); +out_destroy: + iommufd_access_destroy_internal(viommu->ictx, access); +out_free: + kfree(pages); + return ERR_PTR(rc); +} + +int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd) +{ + struct iommu_hw_queue_alloc *cmd = ucmd->cmd; + struct iommufd_hw_queue *hw_queue; + struct iommufd_viommu *viommu; + struct iommufd_access *access; + size_t hw_queue_size; + phys_addr_t base_pa; + u64 last; + int rc; + + if (cmd->flags || cmd->type == IOMMU_HW_QUEUE_TYPE_DEFAULT) + return -EOPNOTSUPP; + if (!cmd->length) + return -EINVAL; + if (check_add_overflow(cmd->nesting_parent_iova, cmd->length - 1, + &last)) + return -EOVERFLOW; + + viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); + if (IS_ERR(viommu)) + return PTR_ERR(viommu); + + if (!viommu->ops || !viommu->ops->get_hw_queue_size || + !viommu->ops->hw_queue_init_phys) { + rc = -EOPNOTSUPP; + goto out_put_viommu; + } + + hw_queue_size = viommu->ops->get_hw_queue_size(viommu, cmd->type); + if (!hw_queue_size) { + rc = -EOPNOTSUPP; + goto out_put_viommu; + } + + /* + * It is a driver bug for providing a hw_queue_size smaller than the + * core HW queue structure size + */ + if (WARN_ON_ONCE(hw_queue_size < sizeof(*hw_queue))) { + rc = -EOPNOTSUPP; + goto out_put_viommu; + } + + hw_queue = (struct iommufd_hw_queue *)_iommufd_object_alloc_ucmd( + ucmd, hw_queue_size, IOMMUFD_OBJ_HW_QUEUE); + if (IS_ERR(hw_queue)) { + rc = PTR_ERR(hw_queue); + goto out_put_viommu; + } + + access = iommufd_hw_queue_alloc_phys(cmd, viommu, &base_pa); + if (IS_ERR(access)) { + rc = PTR_ERR(access); + goto out_put_viommu; + } + + hw_queue->viommu = viommu; + refcount_inc(&viommu->obj.users); + hw_queue->access = access; + hw_queue->type = cmd->type; + hw_queue->length = cmd->length; + hw_queue->base_addr = cmd->nesting_parent_iova; + + rc = viommu->ops->hw_queue_init_phys(hw_queue, cmd->index, base_pa); + if (rc) + goto out_put_viommu; + + cmd->out_hw_queue_id = hw_queue->obj.id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + +out_put_viommu: + iommufd_put_object(ucmd->ictx, &viommu->obj); + return rc; +} diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index f13f3ca6adb5..ce4011a2fc27 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -123,6 +123,7 @@ struct iommufd_vdevice { struct iommufd_hw_queue { struct iommufd_object obj; struct iommufd_viommu *viommu; + struct iommufd_access *access; u64 base_addr; /* in guest physical address space */ size_t length; diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 640a8b5147c2..b928c1ed2395 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -56,6 +56,7 @@ enum { IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92, IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93, + IOMMUFD_CMD_HW_QUEUE_ALLOC = 0x94, }; /** @@ -1156,4 +1157,37 @@ enum iommu_hw_queue_type { IOMMU_HW_QUEUE_TYPE_DEFAULT = 0, }; +/** + * struct iommu_hw_queue_alloc - ioctl(IOMMU_HW_QUEUE_ALLOC) + * @size: sizeof(struct iommu_hw_queue_alloc) + * @flags: Must be 0 + * @viommu_id: Virtual IOMMU ID to associate the HW queue with + * @type: One of enum iommu_hw_queue_type + * @index: The logical index to the HW queue per virtual IOMMU for a multi-queue + * model + * @out_hw_queue_id: The ID of the new HW queue + * @nesting_parent_iova: Base address of the queue memory in the guest physical + * address space + * @length: Length of the queue memory + * + * Allocate a HW queue object for a vIOMMU-specific HW-accelerated queue, which + * allows HW to access a guest queue memory described using @nesting_parent_iova + * and @length. + * + * A vIOMMU can allocate multiple queues, but it must use a different @index per + * type to separate each allocation, e.g:: + * + * Type1 HW queue0, Type1 HW queue1, Type2 HW queue0, ... + */ +struct iommu_hw_queue_alloc { + __u32 size; + __u32 flags; + __u32 viommu_id; + __u32 type; + __u32 index; + __u32 out_hw_queue_id; + __aligned_u64 nesting_parent_iova; + __aligned_u64 length; +}; +#define IOMMU_HW_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HW_QUEUE_ALLOC) #endif -- cgit v1.2.3 From 0b37d892d0425811618a737037b0212884cc25ae Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:07 -0700 Subject: iommufd/driver: Add iommufd_hw_queue_depend/undepend() helpers NVIDIA Virtual Command Queue is one of the iommufd users exposing vIOMMU features to user space VMs. Its hardware has a strict rule when mapping and unmapping multiple global CMDQVs to/from a VM-owned VINTF, requiring mappings in ascending order and unmappings in descending order. The tegra241-cmdqv driver can apply the rule for a mapping in the LVCMDQ allocation handler. However, it can't do the same for an unmapping since user space could start random destroy calls breaking the rule, while the destroy op in the driver level can't reject a destroy call as it returns void. Add iommufd_hw_queue_depend/undepend for-driver helpers, allowing LVCMDQ allocator to refcount_inc() a sibling LVCMDQ object and LVCMDQ destroyer to refcount_dec(), so that iommufd core will help block a random destroy call that breaks the rule. This is a bit of compromise, because a driver might end up with abusing the API that deadlocks the objects. So restrict the API to a dependency between two driver-allocated objects of the same type, as iommufd would unlikely build any core-level dependency in this case. And encourage to use the macro version that currently supports the HW QUEUE objects only. Link: https://patch.msgid.link/r/2735c32e759c82f2e6c87cb32134eaf09b7589b5.1752126748.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Pranjal Shrivastava Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/driver.c | 28 +++++++++++++++++++++++++++ include/linux/iommufd.h | 44 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index 887719016804..e578ef32d30c 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -3,6 +3,34 @@ */ #include "iommufd_private.h" +/* Driver should use a per-structure helper in include/linux/iommufd.h */ +int _iommufd_object_depend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended) +{ + /* Reject self dependency that dead locks */ + if (obj_dependent == obj_depended) + return -EINVAL; + /* Only support dependency between two objects of the same type */ + if (obj_dependent->type != obj_depended->type) + return -EINVAL; + + refcount_inc(&obj_depended->users); + return 0; +} +EXPORT_SYMBOL_NS_GPL(_iommufd_object_depend, "IOMMUFD"); + +/* Driver should use a per-structure helper in include/linux/iommufd.h */ +void _iommufd_object_undepend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended) +{ + if (WARN_ON_ONCE(obj_dependent == obj_depended || + obj_dependent->type != obj_depended->type)) + return; + + refcount_dec(&obj_depended->users); +} +EXPORT_SYMBOL_NS_GPL(_iommufd_object_undepend, "IOMMUFD"); + /* Caller should xa_lock(&viommu->vdevs) to protect the return value */ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index ce4011a2fc27..fa23439fa483 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -251,6 +251,10 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) #endif /* CONFIG_IOMMUFD */ #if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE) +int _iommufd_object_depend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended); +void _iommufd_object_undepend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended); struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, @@ -259,6 +263,18 @@ int iommufd_viommu_report_event(struct iommufd_viommu *viommu, enum iommu_veventq_type type, void *event_data, size_t data_len); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ +static inline int _iommufd_object_depend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended) +{ + return -EOPNOTSUPP; +} + +static inline void +_iommufd_object_undepend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended) +{ +} + static inline struct device * iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) { @@ -298,4 +314,32 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_hw_queue, \ ((drv_struct *)NULL)->member))) +/* + * Helpers for IOMMU driver to build/destroy a dependency between two sibling + * structures created by one of the allocators above + */ +#define iommufd_hw_queue_depend(dependent, depended, member) \ + ({ \ + int ret = -EINVAL; \ + \ + static_assert(__same_type(struct iommufd_hw_queue, \ + dependent->member)); \ + static_assert(__same_type(typeof(*dependent), *depended)); \ + if (!WARN_ON_ONCE(dependent->member.viommu != \ + depended->member.viommu)) \ + ret = _iommufd_object_depend(&dependent->member.obj, \ + &depended->member.obj); \ + ret; \ + }) + +#define iommufd_hw_queue_undepend(dependent, depended, member) \ + ({ \ + static_assert(__same_type(struct iommufd_hw_queue, \ + dependent->member)); \ + static_assert(__same_type(typeof(*dependent), *depended)); \ + WARN_ON_ONCE(dependent->member.viommu != \ + depended->member.viommu); \ + _iommufd_object_undepend(&dependent->member.obj, \ + &depended->member.obj); \ + }) #endif -- cgit v1.2.3 From 56e9a0d8e53f56f313d332888a32a44a71f3a9ab Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:09 -0700 Subject: iommufd: Add mmap interface For vIOMMU passing through HW resources to user space (VMs), allowing a VM to control the passed through HW directly by accessing hardware registers, add an mmap infrastructure to map the physical MMIO pages to user space. Maintain a maple tree per ictx as a translation table managing mmappable regions, from an allocated for-user mmap offset to an iommufd_mmap struct, where it stores the real physical address range for io_remap_pfn_range(). Keep track of the lifecycle of the mmappable region by taking refcount of its owner, so as to enforce user space to unmap the region first before it can destroy its owner object. To allow an IOMMU driver to add and delete mmappable regions onto/from the maple tree, add iommufd_viommu_alloc/destroy_mmap helpers. Link: https://patch.msgid.link/r/9a888a326b12aa5fe940083eae1156304e210fe0.1752126748.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Pranjal Shrivastava Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/driver.c | 52 +++++++++++++++++++++++++++ drivers/iommu/iommufd/iommufd_private.h | 14 ++++++++ drivers/iommu/iommufd/main.c | 63 +++++++++++++++++++++++++++++++++ include/linux/iommufd.h | 42 ++++++++++++++++++++++ 4 files changed, 171 insertions(+) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index e578ef32d30c..e4eae20bcd4e 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -31,6 +31,58 @@ void _iommufd_object_undepend(struct iommufd_object *obj_dependent, } EXPORT_SYMBOL_NS_GPL(_iommufd_object_undepend, "IOMMUFD"); +/* + * Allocate an @offset to return to user space to use for an mmap() syscall + * + * Driver should use a per-structure helper in include/linux/iommufd.h + */ +int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner, + phys_addr_t mmio_addr, size_t length, + unsigned long *offset) +{ + struct iommufd_mmap *immap; + unsigned long startp; + int rc; + + if (!PAGE_ALIGNED(mmio_addr)) + return -EINVAL; + if (!length || !PAGE_ALIGNED(length)) + return -EINVAL; + + immap = kzalloc(sizeof(*immap), GFP_KERNEL); + if (!immap) + return -ENOMEM; + immap->owner = owner; + immap->length = length; + immap->mmio_addr = mmio_addr; + + /* Skip the first page to ease caller identifying the returned offset */ + rc = mtree_alloc_range(&ictx->mt_mmap, &startp, immap, immap->length, + PAGE_SIZE, ULONG_MAX, GFP_KERNEL); + if (rc < 0) { + kfree(immap); + return rc; + } + + /* mmap() syscall will right-shift the offset in vma->vm_pgoff too */ + immap->vm_pgoff = startp >> PAGE_SHIFT; + *offset = startp; + return 0; +} +EXPORT_SYMBOL_NS_GPL(_iommufd_alloc_mmap, "IOMMUFD"); + +/* Driver should use a per-structure helper in include/linux/iommufd.h */ +void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, unsigned long offset) +{ + struct iommufd_mmap *immap; + + immap = mtree_erase(&ictx->mt_mmap, offset); + WARN_ON_ONCE(!immap || immap->owner != owner); + kfree(immap); +} +EXPORT_SYMBOL_NS_GPL(_iommufd_destroy_mmap, "IOMMUFD"); + /* Caller should xa_lock(&viommu->vdevs) to protect the return value */ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index dcd609573244..cd14163abdd1 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +45,7 @@ struct iommufd_ctx { struct xarray groups; wait_queue_head_t destroy_wait; struct rw_semaphore ioas_creation_lock; + struct maple_tree mt_mmap; struct mutex sw_msi_lock; struct list_head sw_msi_list; @@ -55,6 +57,18 @@ struct iommufd_ctx { struct iommufd_ioas *vfio_ioas; }; +/* Entry for iommufd_ctx::mt_mmap */ +struct iommufd_mmap { + struct iommufd_object *owner; + + /* Page-shifted start position in mt_mmap to validate vma->vm_pgoff */ + unsigned long vm_pgoff; + + /* Physical range for io_remap_pfn_range() */ + phys_addr_t mmio_addr; + size_t length; +}; + /* * The IOVA to PFN map. The map automatically copies the PFNs into multiple * domains and permits sharing of PFNs between io_pagetable instances. This diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 4e8dbbfac890..0fb81a905cb1 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -275,6 +275,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); xa_init(&ictx->groups); ictx->file = filp; + mt_init_flags(&ictx->mt_mmap, MT_FLAGS_ALLOC_RANGE); init_waitqueue_head(&ictx->destroy_wait); mutex_init(&ictx->sw_msi_lock); INIT_LIST_HEAD(&ictx->sw_msi_list); @@ -479,11 +480,73 @@ static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd, return ret; } +static void iommufd_fops_vma_open(struct vm_area_struct *vma) +{ + struct iommufd_mmap *immap = vma->vm_private_data; + + refcount_inc(&immap->owner->users); +} + +static void iommufd_fops_vma_close(struct vm_area_struct *vma) +{ + struct iommufd_mmap *immap = vma->vm_private_data; + + refcount_dec(&immap->owner->users); +} + +static const struct vm_operations_struct iommufd_vma_ops = { + .open = iommufd_fops_vma_open, + .close = iommufd_fops_vma_close, +}; + +/* The vm_pgoff must be pre-allocated from mt_mmap, and given to user space */ +static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct iommufd_ctx *ictx = filp->private_data; + size_t length = vma->vm_end - vma->vm_start; + struct iommufd_mmap *immap; + int rc; + + if (!PAGE_ALIGNED(length)) + return -EINVAL; + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + if (vma->vm_flags & VM_EXEC) + return -EPERM; + + /* vma->vm_pgoff carries a page-shifted start position to an immap */ + immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff << PAGE_SHIFT); + if (!immap) + return -ENXIO; + /* + * mtree_load() returns the immap for any contained mmio_addr, so only + * allow the exact immap thing to be mapped + */ + if (vma->vm_pgoff != immap->vm_pgoff || length != immap->length) + return -ENXIO; + + vma->vm_pgoff = 0; + vma->vm_private_data = immap; + vma->vm_ops = &iommufd_vma_ops; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + rc = io_remap_pfn_range(vma, vma->vm_start, + immap->mmio_addr >> PAGE_SHIFT, length, + vma->vm_page_prot); + if (rc) + return rc; + + /* vm_ops.open won't be called for mmap itself. */ + refcount_inc(&immap->owner->users); + return rc; +} + static const struct file_operations iommufd_fops = { .owner = THIS_MODULE, .open = iommufd_fops_open, .release = iommufd_fops_release, .unlocked_ioctl = iommufd_fops_ioctl, + .mmap = iommufd_fops_mmap, }; /** diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index fa23439fa483..e3a0cd47384d 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -255,6 +255,11 @@ int _iommufd_object_depend(struct iommufd_object *obj_dependent, struct iommufd_object *obj_depended); void _iommufd_object_undepend(struct iommufd_object *obj_dependent, struct iommufd_object *obj_depended); +int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner, + phys_addr_t mmio_addr, size_t length, + unsigned long *offset); +void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, unsigned long offset); struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, @@ -275,6 +280,20 @@ _iommufd_object_undepend(struct iommufd_object *obj_dependent, { } +static inline int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, + phys_addr_t mmio_addr, size_t length, + unsigned long *offset) +{ + return -EOPNOTSUPP; +} + +static inline void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, + unsigned long offset) +{ +} + static inline struct device * iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) { @@ -342,4 +361,27 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, _iommufd_object_undepend(&dependent->member.obj, \ &depended->member.obj); \ }) + +/* + * Helpers for IOMMU driver to alloc/destroy an mmapable area for a structure. + * + * To support an mmappable MMIO region, kernel driver must first register it to + * iommufd core to allocate an @offset, during a driver-structure initialization + * (e.g. viommu_init op). Then, it should report to user space this @offset and + * the @length of the MMIO region for mmap syscall. + */ +static inline int iommufd_viommu_alloc_mmap(struct iommufd_viommu *viommu, + phys_addr_t mmio_addr, + size_t length, + unsigned long *offset) +{ + return _iommufd_alloc_mmap(viommu->ictx, &viommu->obj, mmio_addr, + length, offset); +} + +static inline void iommufd_viommu_destroy_mmap(struct iommufd_viommu *viommu, + unsigned long offset) +{ + _iommufd_destroy_mmap(viommu->ictx, &viommu->obj, offset); +} #endif -- cgit v1.2.3 From 62622a8753fa6af3c104f9552863e6473b92fb31 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 9 Jul 2025 22:59:12 -0700 Subject: iommu: Allow an input type in hw_info op The hw_info uAPI will support a bidirectional data_type field that can be used as an input field for user space to request for a specific info data. To prepare for the uAPI update, change the iommu layer first: - Add a new IOMMU_HW_INFO_TYPE_DEFAULT as an input, for which driver can output its only (or firstly) supported type - Update the kdoc accordingly - Roll out the type validation in the existing drivers Link: https://patch.msgid.link/r/00f4a2d3d930721f61367014717b3ba2d1e82a81.1752126748.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Pranjal Shrivastava Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 4 ++++ drivers/iommu/intel/iommu.c | 4 ++++ drivers/iommu/iommufd/device.c | 3 +++ drivers/iommu/iommufd/selftest.c | 4 ++++ include/linux/iommu.h | 3 ++- include/uapi/linux/iommufd.h | 4 +++- 6 files changed, 20 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index 170d69162848..eb9fe1f6311a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -15,6 +15,10 @@ void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 __iomem *base_idr; unsigned int i; + if (*type != IOMMU_HW_INFO_TYPE_DEFAULT && + *type != IOMMU_HW_INFO_TYPE_ARM_SMMUV3) + return ERR_PTR(-EOPNOTSUPP); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return ERR_PTR(-ENOMEM); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 850f1a6f548c..5f75faffca15 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4098,6 +4098,10 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, struct intel_iommu *iommu = info->iommu; struct iommu_hw_info_vtd *vtd; + if (*type != IOMMU_HW_INFO_TYPE_DEFAULT && + *type != IOMMU_HW_INFO_TYPE_INTEL_VTD) + return ERR_PTR(-EOPNOTSUPP); + vtd = kzalloc(sizeof(*vtd), GFP_KERNEL); if (!vtd) return ERR_PTR(-ENOMEM); diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 0567faff5680..14955dc43892 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -1512,6 +1512,9 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) cmd->__reserved[2]) return -EOPNOTSUPP; + /* Clear the type field since drivers don't support a random input */ + cmd->out_data_type = IOMMU_HW_INFO_TYPE_DEFAULT; + idev = iommufd_get_device(ucmd, cmd->dev_id); if (IS_ERR(idev)) return PTR_ERR(idev); diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 8b2c44b32530..a5dc36219a90 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -310,6 +310,10 @@ static void *mock_domain_hw_info(struct device *dev, u32 *length, { struct iommu_test_hw_info *info; + if (*type != IOMMU_HW_INFO_TYPE_DEFAULT && + *type != IOMMU_HW_INFO_TYPE_SELFTEST) + return ERR_PTR(-EOPNOTSUPP); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return ERR_PTR(-ENOMEM); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e06a0fbe4bc7..e8b59ef54e48 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -603,7 +603,8 @@ __iommu_copy_struct_to_user(const struct iommu_user_data *dst_data, * @capable: check capability * @hw_info: report iommu hardware information. The data buffer returned by this * op is allocated in the iommu driver and freed by the caller after - * use. + * use. @type can input a requested type and output a supported type. + * Driver should reject an unsupported data @type input * @domain_alloc: Do not use in new drivers * @domain_alloc_identity: allocate an IDENTITY domain. Drivers should prefer to * use identity_domain instead. This should only be used diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index b928c1ed2395..9c8c304b5de2 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -593,13 +593,15 @@ struct iommu_hw_info_arm_smmuv3 { /** * enum iommu_hw_info_type - IOMMU Hardware Info Types - * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware + * @IOMMU_HW_INFO_TYPE_NONE: Output by the drivers that do not report hardware * info + * @IOMMU_HW_INFO_TYPE_DEFAULT: Input to request for a default type * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type */ enum iommu_hw_info_type { IOMMU_HW_INFO_TYPE_NONE = 0, + IOMMU_HW_INFO_TYPE_DEFAULT = 0, IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2, }; -- cgit v1.2.3 From 850f14f5b91986e586b66565c9c75bdd4c834571 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Wed, 16 Jul 2025 15:03:45 +0800 Subject: iommufd: Destroy vdevice on idevice destroy Destroy iommufd_vdevice (vdev) on iommufd_idevice (idev) destruction so that vdev can't outlive idev. idev represents the physical device bound to iommufd, while the vdev represents the virtual instance of the physical device in the VM. The lifecycle of the vdev should not be longer than idev. This doesn't cause real problem on existing use cases cause vdev doesn't impact the physical device, only provides virtualization information. But to extend vdev for Confidential Computing (CC), there are needs to do secure configuration for the vdev, e.g. TSM Bind/Unbind. These configurations should be rolled back on idev destroy, or the external driver (VFIO) functionality may be impact. The idev is created by external driver so its destruction can't fail. The idev implements pre_destroy() op to actively remove its associated vdev before destroying itself. There are 3 cases on idev pre_destroy(): 1. vdev is already destroyed by userspace. No extra handling needed. 2. vdev is still alive. Use iommufd_object_tombstone_user() to destroy vdev and tombstone the vdev ID. 3. vdev is being destroyed by userspace. The vdev ID is already freed, but vdev destroy handler is not completed. This requires multi-threads syncing - vdev holds idev's short term users reference until vdev destruction completes, idev leverages existing wait_shortterm mechanism for syncing. idev should also block any new reference to it after pre_destroy(), or the following wait shortterm would timeout. Introduce a 'destroying' flag, set it to true on idev pre_destroy(). Any attempt to reference idev should honor this flag under the protection of idev->igroup->lock. Link: https://patch.msgid.link/r/20250716070349.1807226-5-yilun.xu@linux.intel.com Originally-by: Nicolin Chen Suggested-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Co-developed-by: "Aneesh Kumar K.V (Arm)" Signed-off-by: "Aneesh Kumar K.V (Arm)" Tested-by: Nicolin Chen Signed-off-by: Xu Yilun Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 51 ++++++++++++++++++++++++++++++++ drivers/iommu/iommufd/iommufd_private.h | 12 ++++++++ drivers/iommu/iommufd/main.c | 2 ++ drivers/iommu/iommufd/viommu.c | 52 ++++++++++++++++++++++++++++++--- include/linux/iommufd.h | 1 + include/uapi/linux/iommufd.h | 5 ++++ 6 files changed, 119 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index e2ba21c43ad2..ee6ff4caf398 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -137,6 +137,57 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, } } +static void iommufd_device_remove_vdev(struct iommufd_device *idev) +{ + struct iommufd_vdevice *vdev; + + mutex_lock(&idev->igroup->lock); + /* prevent new references from vdev */ + idev->destroying = true; + /* vdev has been completely destroyed by userspace */ + if (!idev->vdev) + goto out_unlock; + + vdev = iommufd_get_vdevice(idev->ictx, idev->vdev->obj.id); + /* + * An ongoing vdev destroy ioctl has removed the vdev from the object + * xarray, but has not finished iommufd_vdevice_destroy() yet as it + * needs the same mutex. We exit the locking then wait on short term + * users for the vdev destruction. + */ + if (IS_ERR(vdev)) + goto out_unlock; + + /* Should never happen */ + if (WARN_ON(vdev != idev->vdev)) { + iommufd_put_object(idev->ictx, &vdev->obj); + goto out_unlock; + } + + /* + * vdev is still alive. Hold a users refcount to prevent racing with + * userspace destruction, then use iommufd_object_tombstone_user() to + * destroy it and leave a tombstone. + */ + refcount_inc(&vdev->obj.users); + iommufd_put_object(idev->ictx, &vdev->obj); + mutex_unlock(&idev->igroup->lock); + iommufd_object_tombstone_user(idev->ictx, &vdev->obj); + return; + +out_unlock: + mutex_unlock(&idev->igroup->lock); +} + +void iommufd_device_pre_destroy(struct iommufd_object *obj) +{ + struct iommufd_device *idev = + container_of(obj, struct iommufd_device, obj); + + /* Release the short term users on this */ + iommufd_device_remove_vdev(idev); +} + void iommufd_device_destroy(struct iommufd_object *obj) { struct iommufd_device *idev = diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 149545060029..5d6ea5395cfe 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -489,6 +489,8 @@ struct iommufd_device { /* always the physical device */ struct device *dev; bool enforce_cache_coherency; + struct iommufd_vdevice *vdev; + bool destroying; }; static inline struct iommufd_device * @@ -499,6 +501,7 @@ iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id) struct iommufd_device, obj); } +void iommufd_device_pre_destroy(struct iommufd_object *obj); void iommufd_device_destroy(struct iommufd_object *obj); int iommufd_get_hw_info(struct iommufd_ucmd *ucmd); @@ -687,9 +690,18 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_viommu_destroy(struct iommufd_object *obj); int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_vdevice_destroy(struct iommufd_object *obj); +void iommufd_vdevice_abort(struct iommufd_object *obj); int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_hw_queue_destroy(struct iommufd_object *obj); +static inline struct iommufd_vdevice * +iommufd_get_vdevice(struct iommufd_ctx *ictx, u32 id) +{ + return container_of(iommufd_get_object(ictx, id, + IOMMUFD_OBJ_VDEVICE), + struct iommufd_vdevice, obj); +} + #ifdef CONFIG_IOMMUFD_TEST int iommufd_test(struct iommufd_ucmd *ucmd); void iommufd_selftest_destroy(struct iommufd_object *obj); diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 53085d24ce4a..99c1aab3d396 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -655,6 +655,7 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { .destroy = iommufd_access_destroy_object, }, [IOMMUFD_OBJ_DEVICE] = { + .pre_destroy = iommufd_device_pre_destroy, .destroy = iommufd_device_destroy, }, [IOMMUFD_OBJ_FAULT] = { @@ -676,6 +677,7 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { }, [IOMMUFD_OBJ_VDEVICE] = { .destroy = iommufd_vdevice_destroy, + .abort = iommufd_vdevice_abort, }, [IOMMUFD_OBJ_VEVENTQ] = { .destroy = iommufd_veventq_destroy, diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index dcf8a85b9f6e..ecbae5091ffe 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -110,20 +110,37 @@ out_put_idev: return rc; } -void iommufd_vdevice_destroy(struct iommufd_object *obj) +void iommufd_vdevice_abort(struct iommufd_object *obj) { struct iommufd_vdevice *vdev = container_of(obj, struct iommufd_vdevice, obj); struct iommufd_viommu *viommu = vdev->viommu; + struct iommufd_device *idev = vdev->idev; + + lockdep_assert_held(&idev->igroup->lock); if (vdev->destroy) vdev->destroy(vdev); /* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */ xa_cmpxchg(&viommu->vdevs, vdev->virt_id, vdev, NULL, GFP_KERNEL); refcount_dec(&viommu->obj.users); + idev->vdev = NULL; put_device(vdev->dev); } +void iommufd_vdevice_destroy(struct iommufd_object *obj) +{ + struct iommufd_vdevice *vdev = + container_of(obj, struct iommufd_vdevice, obj); + struct iommufd_device *idev = vdev->idev; + struct iommufd_ctx *ictx = idev->ictx; + + mutex_lock(&idev->igroup->lock); + iommufd_vdevice_abort(obj); + mutex_unlock(&idev->igroup->lock); + iommufd_put_object(ictx, &idev->obj); +} + int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) { struct iommu_vdevice_alloc *cmd = ucmd->cmd; @@ -153,6 +170,17 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) goto out_put_idev; } + mutex_lock(&idev->igroup->lock); + if (idev->destroying) { + rc = -ENOENT; + goto out_unlock_igroup; + } + + if (idev->vdev) { + rc = -EEXIST; + goto out_unlock_igroup; + } + if (viommu->ops && viommu->ops->vdevice_size) { /* * It is a driver bug for: @@ -171,7 +199,7 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) ucmd->ictx, vdev_size, IOMMUFD_OBJ_VDEVICE); if (IS_ERR(vdev)) { rc = PTR_ERR(vdev); - goto out_put_idev; + goto out_unlock_igroup; } vdev->virt_id = virt_id; @@ -179,6 +207,19 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) get_device(idev->dev); vdev->viommu = viommu; refcount_inc(&viommu->obj.users); + /* + * A short term users reference is held on the idev so long as we have + * the pointer. iommufd_device_pre_destroy() will revoke it before the + * idev real destruction. + */ + vdev->idev = idev; + + /* + * iommufd_device_destroy() delays until idev->vdev is NULL before + * freeing the idev, which only happens once the vdev is finished + * destruction. + */ + idev->vdev = vdev; curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev, GFP_KERNEL); if (curr) { @@ -197,12 +238,15 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) if (rc) goto out_abort; iommufd_object_finalize(ucmd->ictx, &vdev->obj); - goto out_put_idev; + goto out_unlock_igroup; out_abort: iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj); +out_unlock_igroup: + mutex_unlock(&idev->igroup->lock); out_put_idev: - iommufd_put_object(ucmd->ictx, &idev->obj); + if (rc) + iommufd_put_object(ucmd->ictx, &idev->obj); out_put_viommu: iommufd_put_object(ucmd->ictx, &viommu->obj); return rc; diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index e3a0cd47384d..b88911026bc4 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -108,6 +108,7 @@ struct iommufd_viommu { struct iommufd_vdevice { struct iommufd_object obj; struct iommufd_viommu *viommu; + struct iommufd_device *idev; struct device *dev; /* diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 554aacf89ea7..c218c89e0e2e 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -1070,6 +1070,11 @@ struct iommu_viommu_alloc { * * Allocate a virtual device instance (for a physical device) against a vIOMMU. * This instance holds the device's information (related to its vIOMMU) in a VM. + * User should use IOMMU_DESTROY to destroy the virtual device before + * destroying the physical device (by closing vfio_cdev fd). Otherwise the + * virtual device would be forcibly destroyed on physical device destruction, + * its vdevice_id would be permanently leaked (unremovable & unreusable) until + * iommu fd closed. */ struct iommu_vdevice_alloc { __u32 size; -- cgit v1.2.3 From 651f733675c4a26e59dd34522917eace20c557c0 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Wed, 16 Jul 2025 15:03:46 +0800 Subject: iommufd/vdevice: Remove struct device reference from struct vdevice Remove struct device *dev from struct vdevice. The dev pointer is the Plan B for vdevice to reference the physical device. As now vdev->idev is added without refcounting concern, just use vdev->idev->dev when needed. To avoid exposing struct iommufd_device in the public header, export a iommufd_vdevice_to_device() helper. Link: https://patch.msgid.link/r/20250716070349.1807226-6-yilun.xu@linux.intel.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Co-developed-by: Nicolin Chen Signed-off-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Xu Yilun Signed-off-by: Jason Gunthorpe --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 3 ++- drivers/iommu/iommufd/driver.c | 10 ++++++++-- drivers/iommu/iommufd/viommu.c | 3 --- include/linux/iommufd.h | 8 +++++++- 4 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index eb90af5093d8..4c86eacd36b1 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -1218,7 +1218,8 @@ static void tegra241_vintf_destroy_vsid(struct iommufd_vdevice *vdev) static int tegra241_vintf_init_vsid(struct iommufd_vdevice *vdev) { - struct arm_smmu_master *master = dev_iommu_priv_get(vdev->dev); + struct device *dev = iommufd_vdevice_to_device(vdev); + struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct tegra241_vintf *vintf = viommu_to_vintf(vdev->viommu); struct tegra241_vintf_sid *vsid = vdev_to_vsid(vdev); struct arm_smmu_stream *stream = &master->streams[0]; diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index e4eae20bcd4e..6f1010da221c 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -83,6 +83,12 @@ void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, } EXPORT_SYMBOL_NS_GPL(_iommufd_destroy_mmap, "IOMMUFD"); +struct device *iommufd_vdevice_to_device(struct iommufd_vdevice *vdev) +{ + return vdev->idev->dev; +} +EXPORT_SYMBOL_NS_GPL(iommufd_vdevice_to_device, "IOMMUFD"); + /* Caller should xa_lock(&viommu->vdevs) to protect the return value */ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) @@ -92,7 +98,7 @@ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, lockdep_assert_held(&viommu->vdevs.xa_lock); vdev = xa_load(&viommu->vdevs, vdev_id); - return vdev ? vdev->dev : NULL; + return vdev ? iommufd_vdevice_to_device(vdev) : NULL; } EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, "IOMMUFD"); @@ -109,7 +115,7 @@ int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, xa_lock(&viommu->vdevs); xa_for_each(&viommu->vdevs, index, vdev) { - if (vdev->dev == dev) { + if (iommufd_vdevice_to_device(vdev) == dev) { *vdev_id = vdev->virt_id; rc = 0; break; diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index ecbae5091ffe..6cf0bd5d8f08 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -125,7 +125,6 @@ void iommufd_vdevice_abort(struct iommufd_object *obj) xa_cmpxchg(&viommu->vdevs, vdev->virt_id, vdev, NULL, GFP_KERNEL); refcount_dec(&viommu->obj.users); idev->vdev = NULL; - put_device(vdev->dev); } void iommufd_vdevice_destroy(struct iommufd_object *obj) @@ -203,8 +202,6 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) } vdev->virt_id = virt_id; - vdev->dev = idev->dev; - get_device(idev->dev); vdev->viommu = viommu; refcount_inc(&viommu->obj.users); /* diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index b88911026bc4..810e4d8ac912 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -109,7 +109,6 @@ struct iommufd_vdevice { struct iommufd_object obj; struct iommufd_viommu *viommu; struct iommufd_device *idev; - struct device *dev; /* * Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID of @@ -261,6 +260,7 @@ int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner, unsigned long *offset); void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner, unsigned long offset); +struct device *iommufd_vdevice_to_device(struct iommufd_vdevice *vdev); struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, @@ -295,6 +295,12 @@ static inline void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, { } +static inline struct device * +iommufd_vdevice_to_device(struct iommufd_vdevice *vdev) +{ + return NULL; +} + static inline struct device * iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) { -- cgit v1.2.3 From ab6bc44159d8f0c4ee757e0ce041fa9033e0ead8 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Wed, 16 Jul 2025 15:03:49 +0800 Subject: iommufd: Rename some shortterm-related identifiers Rename the shortterm-related identifiers to wait-related. The usage of shortterm_users refcount is now beyond its name. It is also used for references which live longer than an ioctl execution. E.g. vdev holds idev's shortterm_users refcount on vdev allocation, releases it during idev's pre_destroy(). Rename the refcount as wait_cnt, since it is always used to sync the referencing & the destruction of the object by waiting for it to go to zero. List all changed identifiers: iommufd_object::shortterm_users -> iommufd_object::wait_cnt REMOVE_WAIT_SHORTTERM -> REMOVE_WAIT iommufd_object_dec_wait_shortterm() -> iommufd_object_dec_wait() zerod_shortterm -> zerod_wait_cnt No functional change intended. Link: https://patch.msgid.link/r/20250716070349.1807226-9-yilun.xu@linux.intel.com Suggested-by: Kevin Tian Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Xu Yilun Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 6 ++--- drivers/iommu/iommufd/iommufd_private.h | 18 +++++++-------- drivers/iommu/iommufd/main.c | 39 +++++++++++++++++---------------- drivers/iommu/iommufd/viommu.c | 4 ++-- include/linux/iommufd.h | 8 ++++++- 5 files changed, 41 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index ee6ff4caf398..65fbd098f9e9 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -152,8 +152,8 @@ static void iommufd_device_remove_vdev(struct iommufd_device *idev) /* * An ongoing vdev destroy ioctl has removed the vdev from the object * xarray, but has not finished iommufd_vdevice_destroy() yet as it - * needs the same mutex. We exit the locking then wait on short term - * users for the vdev destruction. + * needs the same mutex. We exit the locking then wait on wait_cnt + * reference for the vdev destruction. */ if (IS_ERR(vdev)) goto out_unlock; @@ -184,7 +184,7 @@ void iommufd_device_pre_destroy(struct iommufd_object *obj) struct iommufd_device *idev = container_of(obj, struct iommufd_device, obj); - /* Release the short term users on this */ + /* Release the wait_cnt reference on this */ iommufd_device_remove_vdev(idev); } diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 5d6ea5395cfe..0da2a81eedfa 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -169,7 +169,7 @@ static inline bool iommufd_lock_obj(struct iommufd_object *obj) { if (!refcount_inc_not_zero(&obj->users)) return false; - if (!refcount_inc_not_zero(&obj->shortterm_users)) { + if (!refcount_inc_not_zero(&obj->wait_cnt)) { /* * If the caller doesn't already have a ref on obj this must be * called under the xa_lock. Otherwise the caller is holding a @@ -187,11 +187,11 @@ static inline void iommufd_put_object(struct iommufd_ctx *ictx, struct iommufd_object *obj) { /* - * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees - * a spurious !0 users with a 0 shortterm_users. + * Users first, then wait_cnt so that REMOVE_WAIT never sees a spurious + * !0 users with a 0 wait_cnt. */ refcount_dec(&obj->users); - if (refcount_dec_and_test(&obj->shortterm_users)) + if (refcount_dec_and_test(&obj->wait_cnt)) wake_up_interruptible_all(&ictx->destroy_wait); } @@ -202,7 +202,7 @@ void iommufd_object_finalize(struct iommufd_ctx *ictx, struct iommufd_object *obj); enum { - REMOVE_WAIT_SHORTTERM = BIT(0), + REMOVE_WAIT = BIT(0), REMOVE_OBJ_TOMBSTONE = BIT(1), }; int iommufd_object_remove(struct iommufd_ctx *ictx, @@ -211,15 +211,15 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, /* * The caller holds a users refcount and wants to destroy the object. At this - * point the caller has no shortterm_users reference and at least the xarray - * will be holding one. + * point the caller has no wait_cnt reference and at least the xarray will be + * holding one. */ static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx, struct iommufd_object *obj) { int ret; - ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM); + ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT); /* * If there is a bug and we couldn't destroy the object then we did put @@ -239,7 +239,7 @@ static inline void iommufd_object_tombstone_user(struct iommufd_ctx *ictx, int ret; ret = iommufd_object_remove(ictx, obj, obj->id, - REMOVE_WAIT_SHORTTERM | REMOVE_OBJ_TOMBSTONE); + REMOVE_WAIT | REMOVE_OBJ_TOMBSTONE); /* * If there is a bug and we couldn't destroy the object then we did put diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 99c1aab3d396..15af7ced0501 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -42,7 +42,7 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, return ERR_PTR(-ENOMEM); obj->type = type; /* Starts out bias'd by 1 until it is removed from the xarray */ - refcount_set(&obj->shortterm_users, 1); + refcount_set(&obj->wait_cnt, 1); refcount_set(&obj->users, 1); /* @@ -155,22 +155,22 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, return obj; } -static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx, - struct iommufd_object *to_destroy) +static int iommufd_object_dec_wait(struct iommufd_ctx *ictx, + struct iommufd_object *to_destroy) { - if (refcount_dec_and_test(&to_destroy->shortterm_users)) + if (refcount_dec_and_test(&to_destroy->wait_cnt)) return 0; if (iommufd_object_ops[to_destroy->type].pre_destroy) iommufd_object_ops[to_destroy->type].pre_destroy(to_destroy); if (wait_event_timeout(ictx->destroy_wait, - refcount_read(&to_destroy->shortterm_users) == 0, + refcount_read(&to_destroy->wait_cnt) == 0, msecs_to_jiffies(60000))) return 0; pr_crit("Time out waiting for iommufd object to become free\n"); - refcount_inc(&to_destroy->shortterm_users); + refcount_inc(&to_destroy->wait_cnt); return -EBUSY; } @@ -184,17 +184,18 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, { struct iommufd_object *obj; XA_STATE(xas, &ictx->objects, id); - bool zerod_shortterm = false; + bool zerod_wait_cnt = false; int ret; /* - * The purpose of the shortterm_users is to ensure deterministic - * destruction of objects used by external drivers and destroyed by this - * function. Any temporary increment of the refcount must increment - * shortterm_users, such as during ioctl execution. + * The purpose of the wait_cnt is to ensure deterministic destruction + * of objects used by external drivers and destroyed by this function. + * Incrementing this wait_cnt should either be short lived, such as + * during ioctl execution, or be revoked and blocked during + * pre_destroy(), such as vdev holding the idev's refcount. */ - if (flags & REMOVE_WAIT_SHORTTERM) { - ret = iommufd_object_dec_wait_shortterm(ictx, to_destroy); + if (flags & REMOVE_WAIT) { + ret = iommufd_object_dec_wait(ictx, to_destroy); if (ret) { /* * We have a bug. Put back the callers reference and @@ -203,7 +204,7 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, refcount_dec(&to_destroy->users); return ret; } - zerod_shortterm = true; + zerod_wait_cnt = true; } xa_lock(&ictx->objects); @@ -235,11 +236,11 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, xa_unlock(&ictx->objects); /* - * Since users is zero any positive users_shortterm must be racing + * Since users is zero any positive wait_cnt must be racing * iommufd_put_object(), or we have a bug. */ - if (!zerod_shortterm) { - ret = iommufd_object_dec_wait_shortterm(ictx, obj); + if (!zerod_wait_cnt) { + ret = iommufd_object_dec_wait(ictx, obj); if (WARN_ON(ret)) return ret; } @@ -249,9 +250,9 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, return 0; err_xa: - if (zerod_shortterm) { + if (zerod_wait_cnt) { /* Restore the xarray owned reference */ - refcount_set(&obj->shortterm_users, 1); + refcount_set(&obj->wait_cnt, 1); } xa_unlock(&ictx->objects); diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index 6cf0bd5d8f08..2ca5809b238b 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -205,8 +205,8 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) vdev->viommu = viommu; refcount_inc(&viommu->obj.users); /* - * A short term users reference is held on the idev so long as we have - * the pointer. iommufd_device_pre_destroy() will revoke it before the + * A wait_cnt reference is held on the idev so long as we have the + * pointer. iommufd_device_pre_destroy() will revoke it before the * idev real destruction. */ vdev->idev = idev; diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 810e4d8ac912..6e7efe83bc5d 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -46,7 +46,13 @@ enum iommufd_object_type { /* Base struct for all objects with a userspace ID handle. */ struct iommufd_object { - refcount_t shortterm_users; + /* + * Destroy will sleep and wait for wait_cnt to go to zero. This allows + * concurrent users of the ID to reliably avoid causing a spurious + * destroy failure. Incrementing this count should either be short + * lived or be revoked and blocked during pre_destroy(). + */ + refcount_t wait_cnt; refcount_t users; enum iommufd_object_type type; unsigned int id; -- cgit v1.2.3