summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-07-31 12:43:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-07-31 12:43:08 -0700
commitc93529ad4fa8d8d8cb21649e70a46991a1dda0f8 (patch)
tree6e8463ada36738a111d4978a746222575004f779 /include/linux
parent7ce4de1cdaf11c39b507008dfb5a4e59079d4e8a (diff)
parent2c78e74493d33b002312296fbab1d688bfd0f76f (diff)
Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd
Pull iommufd updates from Jason Gunthorpe: "This broadly brings the assigned HW command queue support to iommufd. This feature is used to improve SVA performance in VMs by avoiding paravirtualization traps during SVA invalidations. Along the way I think some of the core logic is in a much better state to support future driver backed features. Summary: - IOMMU HW now has features to directly assign HW command queues to a guest VM. In this mode the command queue operates on a limited set of invalidation commands that are suitable for improving guest invalidation performance and easy for the HW to virtualize. This brings the generic infrastructure to allow IOMMU drivers to expose such command queues through the iommufd uAPI, mmap the doorbell pages, and get the guest physical range for the command queue ring itself. - An implementation for the NVIDIA SMMUv3 extension "cmdqv" is built on the new iommufd command queue features. It works with the existing SMMU driver support for cmdqv in guest VMs. - Many precursor cleanups and improvements to support the above cleanly, changes to the general ioctl and object helpers, driver support for VDEVICE, and mmap pgoff cookie infrastructure. - Sequence VDEVICE destruction to always happen before VFIO device destruction. When using the above type features, and also in future confidential compute, the internal virtual device representation becomes linked to HW or CC TSM configuration and objects. If a VFIO device is removed from iommufd those HW objects should also be cleaned up to prevent a sort of UAF. This became important now that we have HW backing the VDEVICE. - Fix one syzkaller found error related to math overflows during iova allocation" * tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd: (57 commits) iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size iommu/arm-smmu-v3: Do not bother impl_ops if IOMMU_VIOMMU_TYPE_ARM_SMMUV3 iommufd: Rename some shortterm-related identifiers iommufd/selftest: Add coverage for vdevice tombstone iommufd/selftest: Explicitly skip tests for inapplicable variant iommufd/vdevice: Remove struct device reference from struct vdevice iommufd: Destroy vdevice on idevice destroy iommufd: Add a pre_destroy() op for objects iommufd: Add iommufd_object_tombstone_user() helper iommufd/viommu: Roll back to use iommufd_object_alloc() for vdevice iommufd/selftest: Test reserved regions near ULONG_MAX iommufd: Prevent ALIGN() overflow iommu/tegra241-cmdqv: import IOMMUFD module namespace iommufd: Do not allow _iommufd_object_alloc_ucmd if abort op is set iommu/tegra241-cmdqv: Add IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV support iommu/tegra241-cmdqv: Add user-space use support iommu/tegra241-cmdqv: Do not statically map LVCMDQs iommu/tegra241-cmdqv: Simplify deinit flow in tegra241_cmdqv_remove_vintf() iommu/tegra241-cmdqv: Use request_threaded_irq iommu/arm-smmu-v3-iommufd: Add hw_info to impl_ops ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/iommu.h74
-rw-r--r--include/linux/iommufd.h196
2 files changed, 234 insertions, 36 deletions
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7073be1d8841..c30d12e16473 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -14,6 +14,7 @@
#include <linux/err.h>
#include <linux/of.h>
#include <linux/iova_bitmap.h>
+#include <uapi/linux/iommufd.h>
#define IOMMU_READ (1 << 0)
#define IOMMU_WRITE (1 << 1)
@@ -558,12 +559,52 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size,
}
/**
+ * __iommu_copy_struct_to_user - Report iommu driver specific user space data
+ * @dst_data: Pointer to a struct iommu_user_data for user space data location
+ * @src_data: Pointer to an iommu driver specific user data that is defined in
+ * include/uapi/linux/iommufd.h
+ * @data_type: The data type of the @src_data. Must match with @dst_data.type
+ * @data_len: Length of current user data structure, i.e. sizeof(struct _src)
+ * @min_len: Initial length of user data structure for backward compatibility.
+ * This should be offsetofend using the last member in the user data
+ * struct that was initially added to include/uapi/linux/iommufd.h
+ */
+static inline int
+__iommu_copy_struct_to_user(const struct iommu_user_data *dst_data,
+ void *src_data, unsigned int data_type,
+ size_t data_len, size_t min_len)
+{
+ if (WARN_ON(!dst_data || !src_data))
+ return -EINVAL;
+ if (dst_data->type != data_type)
+ return -EINVAL;
+ if (dst_data->len < min_len || data_len < dst_data->len)
+ return -EINVAL;
+ return copy_struct_to_user(dst_data->uptr, dst_data->len, src_data,
+ data_len, NULL);
+}
+
+/**
+ * iommu_copy_struct_to_user - Report iommu driver specific user space data
+ * @user_data: Pointer to a struct iommu_user_data for user space data location
+ * @ksrc: Pointer to an iommu driver specific user data that is defined in
+ * include/uapi/linux/iommufd.h
+ * @data_type: The data type of the @ksrc. Must match with @user_data->type
+ * @min_last: The last member of the data structure @ksrc points in the initial
+ * version.
+ * Return 0 for success, otherwise -error.
+ */
+#define iommu_copy_struct_to_user(user_data, ksrc, data_type, min_last) \
+ __iommu_copy_struct_to_user(user_data, ksrc, data_type, sizeof(*ksrc), \
+ offsetofend(typeof(*ksrc), min_last))
+
+/**
* struct iommu_ops - iommu ops and capabilities
* @capable: check capability
* @hw_info: report iommu hardware information. The data buffer returned by this
* op is allocated in the iommu driver and freed by the caller after
- * use. The information type is one of enum iommu_hw_info_type defined
- * in include/uapi/linux/iommufd.h.
+ * use. @type can input a requested type and output a supported type.
+ * Driver should reject an unsupported data @type input
* @domain_alloc: Do not use in new drivers
* @domain_alloc_identity: allocate an IDENTITY domain. Drivers should prefer to
* use identity_domain instead. This should only be used
@@ -596,14 +637,16 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size,
* - IOMMU_DOMAIN_DMA: must use a dma domain
* - 0: use the default setting
* @default_domain_ops: the default ops for domains
- * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind
- * the @dev, as the set of virtualization resources shared/passed
- * to user space IOMMU instance. And associate it with a nesting
- * @parent_domain. The @viommu_type must be defined in the header
- * include/uapi/linux/iommufd.h
- * It is required to call iommufd_viommu_alloc() helper for
- * a bundled allocation of the core and the driver structures,
- * using the given @ictx pointer.
+ * @get_viommu_size: Get the size of a driver-level vIOMMU structure for a given
+ * @dev corresponding to @viommu_type. Driver should return 0
+ * if vIOMMU isn't supported accordingly. It is required for
+ * driver to use the VIOMMU_STRUCT_SIZE macro to sanitize the
+ * driver-level vIOMMU structure related to the core one
+ * @viommu_init: Init the driver-level struct of an iommufd_viommu on a physical
+ * IOMMU instance @viommu->iommu_dev, as the set of virtualization
+ * resources shared/passed to user space IOMMU instance. Associate
+ * it with a nesting @parent_domain. It is required for driver to
+ * set @viommu->ops pointing to its own viommu_ops
* @owner: Driver module providing these ops
* @identity_domain: An always available, always attachable identity
* translation.
@@ -619,7 +662,8 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size,
*/
struct iommu_ops {
bool (*capable)(struct device *dev, enum iommu_cap);
- void *(*hw_info)(struct device *dev, u32 *length, u32 *type);
+ void *(*hw_info)(struct device *dev, u32 *length,
+ enum iommu_hw_info_type *type);
/* Domain allocation and freeing by the iommu driver */
#if IS_ENABLED(CONFIG_FSL_PAMU)
@@ -653,9 +697,11 @@ struct iommu_ops {
int (*def_domain_type)(struct device *dev);
- struct iommufd_viommu *(*viommu_alloc)(
- struct device *dev, struct iommu_domain *parent_domain,
- struct iommufd_ctx *ictx, unsigned int viommu_type);
+ size_t (*get_viommu_size)(struct device *dev,
+ enum iommu_viommu_type viommu_type);
+ int (*viommu_init)(struct iommufd_viommu *viommu,
+ struct iommu_domain *parent_domain,
+ const struct iommu_user_data *user_data);
const struct iommu_domain_ops *default_domain_ops;
struct module *owner;
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 34b6e6ca4bfa..6e7efe83bc5d 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -37,6 +37,7 @@ enum iommufd_object_type {
IOMMUFD_OBJ_VIOMMU,
IOMMUFD_OBJ_VDEVICE,
IOMMUFD_OBJ_VEVENTQ,
+ IOMMUFD_OBJ_HW_QUEUE,
#ifdef CONFIG_IOMMUFD_TEST
IOMMUFD_OBJ_SELFTEST,
#endif
@@ -45,7 +46,13 @@ enum iommufd_object_type {
/* Base struct for all objects with a userspace ID handle. */
struct iommufd_object {
- refcount_t shortterm_users;
+ /*
+ * Destroy will sleep and wait for wait_cnt to go to zero. This allows
+ * concurrent users of the ID to reliably avoid causing a spurious
+ * destroy failure. Incrementing this count should either be short
+ * lived or be revoked and blocked during pre_destroy().
+ */
+ refcount_t wait_cnt;
refcount_t users;
enum iommufd_object_type type;
unsigned int id;
@@ -101,7 +108,36 @@ struct iommufd_viommu {
struct list_head veventqs;
struct rw_semaphore veventqs_rwsem;
- unsigned int type;
+ enum iommu_viommu_type type;
+};
+
+struct iommufd_vdevice {
+ struct iommufd_object obj;
+ struct iommufd_viommu *viommu;
+ struct iommufd_device *idev;
+
+ /*
+ * Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID of
+ * AMD IOMMU, and vRID of Intel VT-d
+ */
+ u64 virt_id;
+
+ /* Clean up all driver-specific parts of an iommufd_vdevice */
+ void (*destroy)(struct iommufd_vdevice *vdev);
+};
+
+struct iommufd_hw_queue {
+ struct iommufd_object obj;
+ struct iommufd_viommu *viommu;
+ struct iommufd_access *access;
+
+ u64 base_addr; /* in guest physical address space */
+ size_t length;
+
+ enum iommu_hw_queue_type type;
+
+ /* Clean up all driver-specific parts of an iommufd_hw_queue */
+ void (*destroy)(struct iommufd_hw_queue *hw_queue);
};
/**
@@ -120,6 +156,30 @@ struct iommufd_viommu {
* array->entry_num to report the number of handled requests.
* The data structure of the array entry must be defined in
* include/uapi/linux/iommufd.h
+ * @vdevice_size: Size of the driver-defined vDEVICE structure per this vIOMMU
+ * @vdevice_init: Initialize the driver-level structure of a vDEVICE object, or
+ * related HW procedure. @vdev is already initialized by iommufd
+ * core: vdev->dev and vdev->viommu pointers; vdev->id carries a
+ * per-vIOMMU virtual ID (refer to struct iommu_vdevice_alloc in
+ * include/uapi/linux/iommufd.h)
+ * If driver has a deinit function to revert what vdevice_init op
+ * does, it should set it to the @vdev->destroy function pointer
+ * @get_hw_queue_size: Get the size of a driver-defined HW queue structure for a
+ * given @viommu corresponding to @queue_type. Driver should
+ * return 0 if HW queue aren't supported accordingly. It is
+ * required for driver to use the HW_QUEUE_STRUCT_SIZE macro
+ * to sanitize the driver-level HW queue structure related
+ * to the core one
+ * @hw_queue_init_phys: Initialize the driver-level structure of a HW queue that
+ * is initialized with its core-level structure that holds
+ * all the info about a guest queue memory.
+ * Driver providing this op indicates that HW accesses the
+ * guest queue memory via physical addresses.
+ * @index carries the logical HW QUEUE ID per vIOMMU in a
+ * guest VM, for a multi-queue model. @base_addr_pa carries
+ * the physical location of the guest queue
+ * If driver has a deinit function to revert what this op
+ * does, it should set it to the @hw_queue->destroy pointer
*/
struct iommufd_viommu_ops {
void (*destroy)(struct iommufd_viommu *viommu);
@@ -128,6 +188,13 @@ struct iommufd_viommu_ops {
const struct iommu_user_data *user_data);
int (*cache_invalidate)(struct iommufd_viommu *viommu,
struct iommu_user_data_array *array);
+ const size_t vdevice_size;
+ int (*vdevice_init)(struct iommufd_vdevice *vdev);
+ size_t (*get_hw_queue_size)(struct iommufd_viommu *viommu,
+ enum iommu_hw_queue_type queue_type);
+ /* AMD's HW will add hw_queue_init simply using @hw_queue->base_addr */
+ int (*hw_queue_init_phys)(struct iommufd_hw_queue *hw_queue, u32 index,
+ phys_addr_t base_addr_pa);
};
#if IS_ENABLED(CONFIG_IOMMUFD)
@@ -171,8 +238,9 @@ static inline void iommufd_access_unpin_pages(struct iommufd_access *access,
{
}
-static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
- void *data, size_t len, unsigned int flags)
+static inline int iommufd_access_rw(struct iommufd_access *access,
+ unsigned long iova, void *data, size_t len,
+ unsigned int flags)
{
return -EOPNOTSUPP;
}
@@ -189,9 +257,16 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx)
#endif /* CONFIG_IOMMUFD */
#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE)
-struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
- size_t size,
- enum iommufd_object_type type);
+int _iommufd_object_depend(struct iommufd_object *obj_dependent,
+ struct iommufd_object *obj_depended);
+void _iommufd_object_undepend(struct iommufd_object *obj_dependent,
+ struct iommufd_object *obj_depended);
+int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner,
+ phys_addr_t mmio_addr, size_t length,
+ unsigned long *offset);
+void _iommufd_destroy_mmap(struct iommufd_ctx *ictx,
+ struct iommufd_object *owner, unsigned long offset);
+struct device *iommufd_vdevice_to_device(struct iommufd_vdevice *vdev);
struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
unsigned long vdev_id);
int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu,
@@ -200,11 +275,36 @@ int iommufd_viommu_report_event(struct iommufd_viommu *viommu,
enum iommu_veventq_type type, void *event_data,
size_t data_len);
#else /* !CONFIG_IOMMUFD_DRIVER_CORE */
-static inline struct iommufd_object *
-_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size,
- enum iommufd_object_type type)
+static inline int _iommufd_object_depend(struct iommufd_object *obj_dependent,
+ struct iommufd_object *obj_depended)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void
+_iommufd_object_undepend(struct iommufd_object *obj_dependent,
+ struct iommufd_object *obj_depended)
+{
+}
+
+static inline int _iommufd_alloc_mmap(struct iommufd_ctx *ictx,
+ struct iommufd_object *owner,
+ phys_addr_t mmio_addr, size_t length,
+ unsigned long *offset)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void _iommufd_destroy_mmap(struct iommufd_ctx *ictx,
+ struct iommufd_object *owner,
+ unsigned long offset)
{
- return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline struct device *
+iommufd_vdevice_to_device(struct iommufd_vdevice *vdev)
+{
+ return NULL;
}
static inline struct device *
@@ -228,21 +328,73 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu,
}
#endif /* CONFIG_IOMMUFD_DRIVER_CORE */
+#define VIOMMU_STRUCT_SIZE(drv_struct, member) \
+ (sizeof(drv_struct) + \
+ BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \
+ BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_viommu, \
+ ((drv_struct *)NULL)->member)))
+
+#define VDEVICE_STRUCT_SIZE(drv_struct, member) \
+ (sizeof(drv_struct) + \
+ BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \
+ BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_vdevice, \
+ ((drv_struct *)NULL)->member)))
+
+#define HW_QUEUE_STRUCT_SIZE(drv_struct, member) \
+ (sizeof(drv_struct) + \
+ BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \
+ BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_hw_queue, \
+ ((drv_struct *)NULL)->member)))
+
/*
- * Helpers for IOMMU driver to allocate driver structures that will be freed by
- * the iommufd core. The free op will be called prior to freeing the memory.
+ * Helpers for IOMMU driver to build/destroy a dependency between two sibling
+ * structures created by one of the allocators above
*/
-#define iommufd_viommu_alloc(ictx, drv_struct, member, viommu_ops) \
+#define iommufd_hw_queue_depend(dependent, depended, member) \
({ \
- drv_struct *ret; \
+ int ret = -EINVAL; \
\
- static_assert(__same_type(struct iommufd_viommu, \
- ((drv_struct *)NULL)->member)); \
- static_assert(offsetof(drv_struct, member.obj) == 0); \
- ret = (drv_struct *)_iommufd_object_alloc( \
- ictx, sizeof(drv_struct), IOMMUFD_OBJ_VIOMMU); \
- if (!IS_ERR(ret)) \
- ret->member.ops = viommu_ops; \
+ static_assert(__same_type(struct iommufd_hw_queue, \
+ dependent->member)); \
+ static_assert(__same_type(typeof(*dependent), *depended)); \
+ if (!WARN_ON_ONCE(dependent->member.viommu != \
+ depended->member.viommu)) \
+ ret = _iommufd_object_depend(&dependent->member.obj, \
+ &depended->member.obj); \
ret; \
})
+
+#define iommufd_hw_queue_undepend(dependent, depended, member) \
+ ({ \
+ static_assert(__same_type(struct iommufd_hw_queue, \
+ dependent->member)); \
+ static_assert(__same_type(typeof(*dependent), *depended)); \
+ WARN_ON_ONCE(dependent->member.viommu != \
+ depended->member.viommu); \
+ _iommufd_object_undepend(&dependent->member.obj, \
+ &depended->member.obj); \
+ })
+
+/*
+ * Helpers for IOMMU driver to alloc/destroy an mmapable area for a structure.
+ *
+ * To support an mmappable MMIO region, kernel driver must first register it to
+ * iommufd core to allocate an @offset, during a driver-structure initialization
+ * (e.g. viommu_init op). Then, it should report to user space this @offset and
+ * the @length of the MMIO region for mmap syscall.
+ */
+static inline int iommufd_viommu_alloc_mmap(struct iommufd_viommu *viommu,
+ phys_addr_t mmio_addr,
+ size_t length,
+ unsigned long *offset)
+{
+ return _iommufd_alloc_mmap(viommu->ictx, &viommu->obj, mmio_addr,
+ length, offset);
+}
+
+static inline void iommufd_viommu_destroy_mmap(struct iommufd_viommu *viommu,
+ unsigned long offset)
+{
+ _iommufd_destroy_mmap(viommu->ictx, &viommu->obj, offset);
+}
#endif