From 14678219cf4093e897ab353fd78eab7994d1be7d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:35 +0800 Subject: iommu: Introduce domain attachment handle Currently, when attaching a domain to a device or its PASID, domain is stored within the iommu group. It could be retrieved for use during the window between attachment and detachment. With new features introduced, there's a need to store more information than just a domain pointer. This information essentially represents the association between a domain and a device. For example, the SVA code already has a custom struct iommu_sva which represents a bond between sva domain and a PASID of a device. Looking forward, the IOMMUFD needs a place to store the iommufd_device pointer in the core, so that the device object ID could be quickly retrieved in the critical fault handling path. Introduce domain attachment handle that explicitly represents the attachment relationship between a domain and a device or its PASID. Co-developed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240702063444.105814-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- include/linux/iommu.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 17b3f36ad843..afc5af0069bb 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -989,12 +989,22 @@ struct iommu_fwspec { /* ATS is supported */ #define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0) +/* + * An iommu attach handle represents a relationship between an iommu domain + * and a PASID or RID of a device. It is allocated and managed by the component + * that manages the domain and is stored in the iommu group during the time the + * domain is attached. + */ +struct iommu_attach_handle { + struct iommu_domain *domain; +}; + /** * struct iommu_sva - handle to a device-mm bond */ struct iommu_sva { + struct iommu_attach_handle handle; struct device *dev; - struct iommu_domain *domain; struct list_head handle_item; refcount_t users; }; @@ -1052,7 +1062,8 @@ int iommu_device_claim_dma_owner(struct device *dev, void *owner); void iommu_device_release_dma_owner(struct device *dev); int iommu_attach_device_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid); + struct device *dev, ioasid_t pasid, + struct iommu_attach_handle *handle); void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); struct iommu_domain * @@ -1388,7 +1399,8 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner) } static inline int iommu_attach_device_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid) + struct device *dev, ioasid_t pasid, + struct iommu_attach_handle *handle) { return -ENODEV; } -- cgit v1.3 From 3e7f57d1ef3f5fbed58974fae38d35e430f57d35 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:36 +0800 Subject: iommu: Remove sva handle list The struct sva_iommu represents an association between an SVA domain and a PASID of a device. It's stored in the iommu group's pasid array and also tracked by a list in the per-mm data structure. Removes duplicate tracking of sva_iommu by eliminating the list. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240702063444.105814-3-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/iommu-priv.h | 3 +++ drivers/iommu/iommu-sva.c | 30 ++++++++++++++++++++---------- drivers/iommu/iommu.c | 31 +++++++++++++++++++++++++++++++ include/linux/iommu.h | 2 -- 4 files changed, 54 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h index 5f731d994803..f1536a5ebb0d 100644 --- a/drivers/iommu/iommu-priv.h +++ b/drivers/iommu/iommu-priv.h @@ -28,4 +28,7 @@ void iommu_device_unregister_bus(struct iommu_device *iommu, const struct bus_type *bus, struct notifier_block *nb); +struct iommu_attach_handle *iommu_attach_handle_get(struct iommu_group *group, + ioasid_t pasid, + unsigned int type); #endif /* __LINUX_IOMMU_PRIV_H */ diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 0fb923254062..9b7f62517419 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -41,7 +41,6 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de } iommu_mm->pasid = pasid; INIT_LIST_HEAD(&iommu_mm->sva_domains); - INIT_LIST_HEAD(&iommu_mm->sva_handles); /* * Make sure the write to mm->iommu_mm is not reordered in front of * initialization to iommu_mm fields. If it does, readers may see a @@ -69,11 +68,16 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de */ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) { + struct iommu_group *group = dev->iommu_group; + struct iommu_attach_handle *attach_handle; struct iommu_mm_data *iommu_mm; struct iommu_domain *domain; struct iommu_sva *handle; int ret; + if (!group) + return ERR_PTR(-ENODEV); + mutex_lock(&iommu_sva_lock); /* Allocate mm->pasid if necessary. */ @@ -83,12 +87,22 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm goto out_unlock; } - list_for_each_entry(handle, &mm->iommu_mm->sva_handles, handle_item) { - if (handle->dev == dev) { - refcount_inc(&handle->users); - mutex_unlock(&iommu_sva_lock); - return handle; + /* A bond already exists, just take a reference`. */ + attach_handle = iommu_attach_handle_get(group, iommu_mm->pasid, IOMMU_DOMAIN_SVA); + if (!IS_ERR(attach_handle)) { + handle = container_of(attach_handle, struct iommu_sva, handle); + if (attach_handle->domain->mm != mm) { + ret = -EBUSY; + goto out_unlock; } + refcount_inc(&handle->users); + mutex_unlock(&iommu_sva_lock); + return handle; + } + + if (PTR_ERR(attach_handle) != -ENOENT) { + ret = PTR_ERR(attach_handle); + goto out_unlock; } handle = kzalloc(sizeof(*handle), GFP_KERNEL); @@ -99,7 +113,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm /* Search for an existing domain. */ list_for_each_entry(domain, &mm->iommu_mm->sva_domains, next) { - handle->handle.domain = domain; ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid, &handle->handle); if (!ret) { @@ -115,7 +128,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm goto out_free_handle; } - handle->handle.domain = domain; ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid, &handle->handle); if (ret) @@ -125,7 +137,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm out: refcount_set(&handle->users, 1); - list_add(&handle->handle_item, &mm->iommu_mm->sva_handles); mutex_unlock(&iommu_sva_lock); handle->dev = dev; return handle; @@ -159,7 +170,6 @@ void iommu_sva_unbind_device(struct iommu_sva *handle) mutex_unlock(&iommu_sva_lock); return; } - list_del(&handle->handle_item); iommu_detach_device_pasid(domain, dev, iommu_mm->pasid); if (--domain->users == 0) { diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index a712b0cc3a1d..7890bd21dff6 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3486,3 +3486,34 @@ void iommu_free_global_pasid(ioasid_t pasid) ida_free(&iommu_global_pasid_ida, pasid); } EXPORT_SYMBOL_GPL(iommu_free_global_pasid); + +/** + * iommu_attach_handle_get - Return the attach handle + * @group: the iommu group that domain was attached to + * @pasid: the pasid within the group + * @type: matched domain type, 0 for any match + * + * Return handle or ERR_PTR(-ENOENT) on none, ERR_PTR(-EBUSY) on mismatch. + * + * Return the attach handle to the caller. The life cycle of an iommu attach + * handle is from the time when the domain is attached to the time when the + * domain is detached. Callers are required to synchronize the call of + * iommu_attach_handle_get() with domain attachment and detachment. The attach + * handle can only be used during its life cycle. + */ +struct iommu_attach_handle * +iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type) +{ + struct iommu_attach_handle *handle; + + xa_lock(&group->pasid_array); + handle = xa_load(&group->pasid_array, pasid); + if (!handle) + handle = ERR_PTR(-ENOENT); + else if (type && handle->domain->type != type) + handle = ERR_PTR(-EBUSY); + xa_unlock(&group->pasid_array); + + return handle; +} +EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, IOMMUFD_INTERNAL); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index afc5af0069bb..87ebcc29020e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1005,14 +1005,12 @@ struct iommu_attach_handle { struct iommu_sva { struct iommu_attach_handle handle; struct device *dev; - struct list_head handle_item; refcount_t users; }; struct iommu_mm_data { u32 pasid; struct list_head sva_domains; - struct list_head sva_handles; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, -- cgit v1.3 From 06cdcc32d65759d42c6340700796e2906045b6a5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:37 +0800 Subject: iommu: Add attach handle to struct iopf_group Previously, the domain that a page fault targets is stored in an iopf_group, which represents a minimal set of page faults. With the introduction of attach handle, replace the domain with the handle so that the fault handler can obtain more information as needed when handling the faults. iommu_report_device_fault() is currently used for SVA page faults, which handles the page fault in an internal cycle. The domain is retrieved with iommu_get_domain_for_dev_pasid() if the pasid in the fault message is valid. This doesn't work in IOMMUFD case, where if the pasid table of a device is wholly managed by user space, there is no domain attached to the PASID of the device, and all page faults are forwarded through a NESTING domain attaching to RID. Add a static flag in iommu ops, which indicates if the IOMMU driver supports user-managed PASID tables. In the iopf deliver path, if no attach handle found for the iopf PASID, roll back to RID domain when the IOMMU driver supports this capability. iommu_get_domain_for_dev_pasid() is no longer used and can be removed. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240702063444.105814-4-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/io-pgfault.c | 61 ++++++++++++++++++++++++++-------------------- drivers/iommu/iommu-sva.c | 3 ++- drivers/iommu/iommu.c | 39 ----------------------------- include/linux/iommu.h | 17 +++++-------- 4 files changed, 42 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 06d78fcc79fd..7c9011992d3f 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -59,30 +59,6 @@ void iopf_free_group(struct iopf_group *group) } EXPORT_SYMBOL_GPL(iopf_free_group); -static struct iommu_domain *get_domain_for_iopf(struct device *dev, - struct iommu_fault *fault) -{ - struct iommu_domain *domain; - - if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { - domain = iommu_get_domain_for_dev_pasid(dev, fault->prm.pasid, 0); - if (IS_ERR(domain)) - domain = NULL; - } else { - domain = iommu_get_domain_for_dev(dev); - } - - if (!domain || !domain->iopf_handler) { - dev_warn_ratelimited(dev, - "iopf (pasid %d) without domain attached or handler installed\n", - fault->prm.pasid); - - return NULL; - } - - return domain; -} - /* Non-last request of a group. Postpone until the last one. */ static int report_partial_fault(struct iommu_fault_param *fault_param, struct iommu_fault *fault) @@ -206,20 +182,51 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) if (group == &abort_group) goto err_abort; - group->domain = get_domain_for_iopf(dev, fault); - if (!group->domain) + if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { + group->attach_handle = iommu_attach_handle_get(dev->iommu_group, + fault->prm.pasid, + 0); + if (IS_ERR(group->attach_handle)) { + const struct iommu_ops *ops = dev_iommu_ops(dev); + + if (!ops->user_pasid_table) + goto err_abort; + + /* + * The iommu driver for this device supports user- + * managed PASID table. Therefore page faults for + * any PASID should go through the NESTING domain + * attached to the device RID. + */ + group->attach_handle = + iommu_attach_handle_get(dev->iommu_group, + IOMMU_NO_PASID, + IOMMU_DOMAIN_NESTED); + if (IS_ERR(group->attach_handle)) + goto err_abort; + } + } else { + group->attach_handle = + iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); + if (IS_ERR(group->attach_handle)) + goto err_abort; + } + + if (!group->attach_handle->domain->iopf_handler) goto err_abort; /* * On success iopf_handler must call iopf_group_response() and * iopf_free_group() */ - if (group->domain->iopf_handler(group)) + if (group->attach_handle->domain->iopf_handler(group)) goto err_abort; return; err_abort: + dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n", + fault->prm.pasid); iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE); if (group == &abort_group) __iopf_free_group(group); diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 9b7f62517419..69cde094440e 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -272,7 +272,8 @@ static void iommu_sva_handle_iopf(struct work_struct *work) if (status != IOMMU_PAGE_RESP_SUCCESS) break; - status = iommu_sva_handle_mm(&iopf->fault, group->domain->mm); + status = iommu_sva_handle_mm(&iopf->fault, + group->attach_handle->domain->mm); } iopf_group_response(group, status); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 7890bd21dff6..5a7e874abb36 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3421,45 +3421,6 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, } EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); -/* - * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev - * @dev: the queried device - * @pasid: the pasid of the device - * @type: matched domain type, 0 for any match - * - * This is a variant of iommu_get_domain_for_dev(). It returns the existing - * domain attached to pasid of a device. Callers must hold a lock around this - * function, and both iommu_attach/detach_dev_pasid() whenever a domain of - * type is being manipulated. This API does not internally resolve races with - * attach/detach. - * - * Return: attached domain on success, NULL otherwise. - */ -struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, - ioasid_t pasid, - unsigned int type) -{ - /* Caller must be a probed driver on dev */ - struct iommu_group *group = dev->iommu_group; - struct iommu_attach_handle *handle; - struct iommu_domain *domain = NULL; - - if (!group) - return NULL; - - xa_lock(&group->pasid_array); - handle = xa_load(&group->pasid_array, pasid); - if (handle) - domain = handle->domain; - - if (type && domain && domain->type != type) - domain = NULL; - xa_unlock(&group->pasid_array); - - return domain; -} -EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid); - ioasid_t iommu_alloc_global_pasid(struct device *dev) { int ret; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 87ebcc29020e..910aec80886e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -127,7 +127,7 @@ struct iopf_group { /* list node for iommu_fault_param::faults */ struct list_head pending_node; struct work_struct work; - struct iommu_domain *domain; + struct iommu_attach_handle *attach_handle; /* The device's fault data parameter. */ struct iommu_fault_param *fault_param; }; @@ -547,6 +547,10 @@ static inline int __iommu_copy_struct_from_user_array( * @default_domain: If not NULL this will always be set as the default domain. * This should be an IDENTITY/BLOCKED/PLATFORM domain. * Do not use in new drivers. + * @user_pasid_table: IOMMU driver supports user-managed PASID table. There is + * no user domain for each PASID and the I/O page faults are + * forwarded through the user domain attached to the device + * RID. */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); @@ -590,6 +594,7 @@ struct iommu_ops { struct iommu_domain *blocked_domain; struct iommu_domain *release_domain; struct iommu_domain *default_domain; + u8 user_pasid_table:1; }; /** @@ -1064,9 +1069,6 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, struct iommu_attach_handle *handle); void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); -struct iommu_domain * -iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, - unsigned int type); ioasid_t iommu_alloc_global_pasid(struct device *dev); void iommu_free_global_pasid(ioasid_t pasid); #else /* CONFIG_IOMMU_API */ @@ -1408,13 +1410,6 @@ static inline void iommu_detach_device_pasid(struct iommu_domain *domain, { } -static inline struct iommu_domain * -iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, - unsigned int type) -{ - return NULL; -} - static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) { return IOMMU_PASID_INVALID; -- cgit v1.3 From 07838f7fd529c8a6de44b601d4b7057e6c8d36ed Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:40 +0800 Subject: iommufd: Add iommufd fault object An iommufd fault object provides an interface for delivering I/O page faults to user space. These objects are created and destroyed by user space, and they can be associated with or dissociated from hardware page table objects during page table allocation or destruction. User space interacts with the fault object through a file interface. This interface offers a straightforward and efficient way for user space to handle page faults. It allows user space to read fault messages sequentially and respond to them by writing to the same file. The file interface supports reading messages in poll mode, so it's recommended that user space applications use io_uring to enhance read and write efficiency. A fault object can be associated with any iopf-capable iommufd_hw_pgtable during the pgtable's allocation. All I/O page faults triggered by devices when accessing the I/O addresses of an iommufd_hw_pgtable are routed through the fault object to user space. Similarly, user space's responses to these page faults are routed back to the iommu device driver through the same fault object. Link: https://lore.kernel.org/r/20240702063444.105814-7-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/io-pgfault.c | 2 + drivers/iommu/iommufd/Makefile | 1 + drivers/iommu/iommufd/fault.c | 226 ++++++++++++++++++++++++++++++++ drivers/iommu/iommufd/iommufd_private.h | 30 +++++ drivers/iommu/iommufd/main.c | 6 + include/linux/iommu.h | 4 + include/uapi/linux/iommufd.h | 18 +++ 7 files changed, 287 insertions(+) create mode 100644 drivers/iommu/iommufd/fault.c (limited to 'include/linux') diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 7c9011992d3f..cd679c13752e 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -110,6 +110,8 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, list_add(&group->pending_node, &iopf_param->faults); mutex_unlock(&iopf_param->lock); + group->fault_count = list_count_nodes(&group->faults); + return group; } diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index 34b446146961..cf4605962bea 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only iommufd-y := \ device.o \ + fault.o \ hw_pagetable.o \ io_pagetable.o \ ioas.o \ diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c new file mode 100644 index 000000000000..68ff94671d48 --- /dev/null +++ b/drivers/iommu/iommufd/fault.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2024 Intel Corporation + */ +#define pr_fmt(fmt) "iommufd: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../iommu-priv.h" +#include "iommufd_private.h" + +void iommufd_fault_destroy(struct iommufd_object *obj) +{ + struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj); + struct iopf_group *group, *next; + + /* + * The iommufd object's reference count is zero at this point. + * We can be confident that no other threads are currently + * accessing this pointer. Therefore, acquiring the mutex here + * is unnecessary. + */ + list_for_each_entry_safe(group, next, &fault->deliver, node) { + list_del(&group->node); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } +} + +static void iommufd_compose_fault_message(struct iommu_fault *fault, + struct iommu_hwpt_pgfault *hwpt_fault, + struct iommufd_device *idev, + u32 cookie) +{ + hwpt_fault->flags = fault->prm.flags; + hwpt_fault->dev_id = idev->obj.id; + hwpt_fault->pasid = fault->prm.pasid; + hwpt_fault->grpid = fault->prm.grpid; + hwpt_fault->perm = fault->prm.perm; + hwpt_fault->addr = fault->prm.addr; + hwpt_fault->length = 0; + hwpt_fault->cookie = cookie; +} + +static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, + size_t count, loff_t *ppos) +{ + size_t fault_size = sizeof(struct iommu_hwpt_pgfault); + struct iommufd_fault *fault = filep->private_data; + struct iommu_hwpt_pgfault data; + struct iommufd_device *idev; + struct iopf_group *group; + struct iopf_fault *iopf; + size_t done = 0; + int rc = 0; + + if (*ppos || count % fault_size) + return -ESPIPE; + + mutex_lock(&fault->mutex); + while (!list_empty(&fault->deliver) && count > done) { + group = list_first_entry(&fault->deliver, + struct iopf_group, node); + + if (group->fault_count * fault_size > count - done) + break; + + rc = xa_alloc(&fault->response, &group->cookie, group, + xa_limit_32b, GFP_KERNEL); + if (rc) + break; + + idev = to_iommufd_handle(group->attach_handle)->idev; + list_for_each_entry(iopf, &group->faults, list) { + iommufd_compose_fault_message(&iopf->fault, + &data, idev, + group->cookie); + if (copy_to_user(buf + done, &data, fault_size)) { + xa_erase(&fault->response, group->cookie); + rc = -EFAULT; + break; + } + done += fault_size; + } + + list_del(&group->node); + } + mutex_unlock(&fault->mutex); + + return done == 0 ? rc : done; +} + +static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf, + size_t count, loff_t *ppos) +{ + size_t response_size = sizeof(struct iommu_hwpt_page_response); + struct iommufd_fault *fault = filep->private_data; + struct iommu_hwpt_page_response response; + struct iopf_group *group; + size_t done = 0; + int rc = 0; + + if (*ppos || count % response_size) + return -ESPIPE; + + mutex_lock(&fault->mutex); + while (count > done) { + rc = copy_from_user(&response, buf + done, response_size); + if (rc) + break; + + group = xa_erase(&fault->response, response.cookie); + if (!group) { + rc = -EINVAL; + break; + } + + iopf_group_response(group, response.code); + iopf_free_group(group); + done += response_size; + } + mutex_unlock(&fault->mutex); + + return done == 0 ? rc : done; +} + +static __poll_t iommufd_fault_fops_poll(struct file *filep, + struct poll_table_struct *wait) +{ + struct iommufd_fault *fault = filep->private_data; + __poll_t pollflags = EPOLLOUT; + + poll_wait(filep, &fault->wait_queue, wait); + mutex_lock(&fault->mutex); + if (!list_empty(&fault->deliver)) + pollflags |= EPOLLIN | EPOLLRDNORM; + mutex_unlock(&fault->mutex); + + return pollflags; +} + +static int iommufd_fault_fops_release(struct inode *inode, struct file *filep) +{ + struct iommufd_fault *fault = filep->private_data; + + refcount_dec(&fault->obj.users); + iommufd_ctx_put(fault->ictx); + return 0; +} + +static const struct file_operations iommufd_fault_fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .read = iommufd_fault_fops_read, + .write = iommufd_fault_fops_write, + .poll = iommufd_fault_fops_poll, + .release = iommufd_fault_fops_release, + .llseek = no_llseek, +}; + +int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) +{ + struct iommu_fault_alloc *cmd = ucmd->cmd; + struct iommufd_fault *fault; + struct file *filep; + int fdno; + int rc; + + if (cmd->flags) + return -EOPNOTSUPP; + + fault = iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT); + if (IS_ERR(fault)) + return PTR_ERR(fault); + + fault->ictx = ucmd->ictx; + INIT_LIST_HEAD(&fault->deliver); + xa_init_flags(&fault->response, XA_FLAGS_ALLOC1); + mutex_init(&fault->mutex); + init_waitqueue_head(&fault->wait_queue); + + filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops, + fault, O_RDWR); + if (IS_ERR(filep)) { + rc = PTR_ERR(filep); + goto out_abort; + } + + refcount_inc(&fault->obj.users); + iommufd_ctx_get(fault->ictx); + fault->filep = filep; + + fdno = get_unused_fd_flags(O_CLOEXEC); + if (fdno < 0) { + rc = fdno; + goto out_fput; + } + + cmd->out_fault_id = fault->obj.id; + cmd->out_fault_fd = fdno; + + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_put_fdno; + iommufd_object_finalize(ucmd->ictx, &fault->obj); + + fd_install(fdno, fault->filep); + + return 0; +out_put_fdno: + put_unused_fd(fdno); +out_fput: + fput(filep); + refcount_dec(&fault->obj.users); + iommufd_ctx_put(fault->ictx); +out_abort: + iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj); + + return rc; +} diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 991f864d1f9b..c8a4519f1405 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -128,6 +128,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_HWPT_NESTED, IOMMUFD_OBJ_IOAS, IOMMUFD_OBJ_ACCESS, + IOMMUFD_OBJ_FAULT, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -426,6 +427,35 @@ void iopt_remove_access(struct io_pagetable *iopt, u32 iopt_access_list_id); void iommufd_access_destroy_object(struct iommufd_object *obj); +/* + * An iommufd_fault object represents an interface to deliver I/O page faults + * to the user space. These objects are created/destroyed by the user space and + * associated with hardware page table objects during page-table allocation. + */ +struct iommufd_fault { + struct iommufd_object obj; + struct iommufd_ctx *ictx; + struct file *filep; + + /* The lists of outstanding faults protected by below mutex. */ + struct mutex mutex; + struct list_head deliver; + struct xarray response; + + struct wait_queue_head wait_queue; +}; + +struct iommufd_attach_handle { + struct iommu_attach_handle handle; + struct iommufd_device *idev; +}; + +/* Convert an iommu attach handle to iommufd handle. */ +#define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle) + +int iommufd_fault_alloc(struct iommufd_ucmd *ucmd); +void iommufd_fault_destroy(struct iommufd_object *obj); + #ifdef CONFIG_IOMMUFD_TEST int iommufd_test(struct iommufd_ucmd *ucmd); void iommufd_selftest_destroy(struct iommufd_object *obj); diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 39b32932c61e..83bbd7c5d160 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -319,6 +319,7 @@ static int iommufd_option(struct iommufd_ucmd *ucmd) union ucmd_buffer { struct iommu_destroy destroy; + struct iommu_fault_alloc fault; struct iommu_hw_info info; struct iommu_hwpt_alloc hwpt; struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap; @@ -355,6 +356,8 @@ struct iommufd_ioctl_op { } static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), + IOCTL_OP(IOMMU_FAULT_QUEUE_ALLOC, iommufd_fault_alloc, struct iommu_fault_alloc, + out_fault_fd), IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info, __reserved), IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, @@ -513,6 +516,9 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { .destroy = iommufd_hwpt_nested_destroy, .abort = iommufd_hwpt_nested_abort, }, + [IOMMUFD_OBJ_FAULT] = { + .destroy = iommufd_fault_destroy, + }, #ifdef CONFIG_IOMMUFD_TEST [IOMMUFD_OBJ_SELFTEST] = { .destroy = iommufd_selftest_destroy, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 910aec80886e..73bc3aee95a1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -124,12 +124,16 @@ struct iopf_fault { struct iopf_group { struct iopf_fault last_fault; struct list_head faults; + size_t fault_count; /* list node for iommu_fault_param::faults */ struct list_head pending_node; struct work_struct work; struct iommu_attach_handle *attach_handle; /* The device's fault data parameter. */ struct iommu_fault_param *fault_param; + /* Used by handler provider to hook the group on its own lists. */ + struct list_head node; + u32 cookie; }; /** diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 4d89ed97b533..70b8a38fcd46 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -50,6 +50,7 @@ enum { IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, IOMMUFD_CMD_HWPT_INVALIDATE, + IOMMUFD_CMD_FAULT_QUEUE_ALLOC, }; /** @@ -775,4 +776,21 @@ struct iommu_hwpt_page_response { __u32 cookie; __u32 code; }; + +/** + * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC) + * @size: sizeof(struct iommu_fault_alloc) + * @flags: Must be 0 + * @out_fault_id: The ID of the new FAULT + * @out_fault_fd: The fd of the new FAULT + * + * Explicitly allocate a fault handling object. + */ +struct iommu_fault_alloc { + __u32 size; + __u32 flags; + __u32 out_fault_id; + __u32 out_fault_fd; +}; +#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC) #endif -- cgit v1.3