diff options
Diffstat (limited to 'drivers/iommu/iommufd')
| -rw-r--r-- | drivers/iommu/iommufd/Kconfig | 1 | ||||
| -rw-r--r-- | drivers/iommu/iommufd/driver.c | 2 | ||||
| -rw-r--r-- | drivers/iommu/iommufd/io_pagetable.c | 90 | ||||
| -rw-r--r-- | drivers/iommu/iommufd/io_pagetable.h | 54 | ||||
| -rw-r--r-- | drivers/iommu/iommufd/ioas.c | 12 | ||||
| -rw-r--r-- | drivers/iommu/iommufd/iommufd_private.h | 18 | ||||
| -rw-r--r-- | drivers/iommu/iommufd/iommufd_test.h | 21 | ||||
| -rw-r--r-- | drivers/iommu/iommufd/iova_bitmap.c | 5 | ||||
| -rw-r--r-- | drivers/iommu/iommufd/main.c | 10 | ||||
| -rw-r--r-- | drivers/iommu/iommufd/pages.c | 414 | ||||
| -rw-r--r-- | drivers/iommu/iommufd/selftest.c | 569 |
11 files changed, 856 insertions, 340 deletions
diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index 2beeb4f60ee5..eae3f03629b0 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -41,6 +41,7 @@ config IOMMUFD_TEST depends on DEBUG_KERNEL depends on FAULT_INJECTION depends on RUNTIME_TESTING_MENU + depends on IOMMU_PT_AMDV1 select IOMMUFD_DRIVER default n help diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index 6f1010da221c..21d4a35538f6 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -161,8 +161,8 @@ int iommufd_viommu_report_event(struct iommufd_viommu *viommu, vevent = &veventq->lost_events_header; goto out_set_header; } - memcpy(vevent->event_data, event_data, data_len); vevent->data_len = data_len; + memcpy(vevent->event_data, event_data, data_len); veventq->num_events++; out_set_header: diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index c0360c450880..54cf4d856179 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -8,8 +8,10 @@ * The datastructure uses the iopt_pages to optimize the storage of the PFNs * between the domains and xarray. */ +#include <linux/dma-buf.h> #include <linux/err.h> #include <linux/errno.h> +#include <linux/file.h> #include <linux/iommu.h> #include <linux/iommufd.h> #include <linux/lockdep.h> @@ -284,6 +286,9 @@ static int iopt_alloc_area_pages(struct io_pagetable *iopt, case IOPT_ADDRESS_FILE: start = elm->start_byte + elm->pages->start; break; + case IOPT_ADDRESS_DMABUF: + start = elm->start_byte + elm->pages->dmabuf.start; + break; } rc = iopt_alloc_iova(iopt, dst_iova, start, length); if (rc) @@ -468,25 +473,53 @@ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, * @iopt: io_pagetable to act on * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains * the chosen iova on output. Otherwise is the iova to map to on input - * @file: file to map + * @fd: fdno of a file to map * @start: map file starting at this byte offset * @length: Number of bytes to map * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping * @flags: IOPT_ALLOC_IOVA or zero */ int iopt_map_file_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, - unsigned long *iova, struct file *file, - unsigned long start, unsigned long length, - int iommu_prot, unsigned int flags) + unsigned long *iova, int fd, unsigned long start, + unsigned long length, int iommu_prot, + unsigned int flags) { struct iopt_pages *pages; + struct dma_buf *dmabuf; + unsigned long start_byte; + unsigned long last; + + if (!length) + return -EINVAL; + if (check_add_overflow(start, length - 1, &last)) + return -EOVERFLOW; + + start_byte = start - ALIGN_DOWN(start, PAGE_SIZE); + dmabuf = dma_buf_get(fd); + if (!IS_ERR(dmabuf)) { + pages = iopt_alloc_dmabuf_pages(ictx, dmabuf, start_byte, start, + length, + iommu_prot & IOMMU_WRITE); + if (IS_ERR(pages)) { + dma_buf_put(dmabuf); + return PTR_ERR(pages); + } + } else { + struct file *file; + + file = fget(fd); + if (!file) + return -EBADF; + + pages = iopt_alloc_file_pages(file, start_byte, start, length, + iommu_prot & IOMMU_WRITE); + fput(file); + if (IS_ERR(pages)) + return PTR_ERR(pages); + } - pages = iopt_alloc_file_pages(file, start, length, - iommu_prot & IOMMU_WRITE); - if (IS_ERR(pages)) - return PTR_ERR(pages); return iopt_map_common(ictx, iopt, pages, iova, length, - start - pages->start, iommu_prot, flags); + start_byte, iommu_prot, flags); } struct iova_bitmap_fn_arg { @@ -707,7 +740,8 @@ static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, struct iopt_area *area; unsigned long unmapped_bytes = 0; unsigned int tries = 0; - int rc = -ENOENT; + /* If there are no mapped entries then success */ + int rc = 0; /* * The domains_rwsem must be held in read mode any time any area->pages @@ -777,8 +811,6 @@ again: down_write(&iopt->iova_rwsem); } - if (unmapped_bytes) - rc = 0; out_unlock_iova: up_write(&iopt->iova_rwsem); @@ -815,13 +847,8 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) { - int rc; - - rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); /* If the IOVAs are empty then unmap all succeeds */ - if (rc == -ENOENT) - return 0; - return rc; + return iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); } /* The caller must always free all the nodes in the allowed_iova rb_root. */ @@ -967,9 +994,15 @@ static void iopt_unfill_domain(struct io_pagetable *iopt, WARN_ON(!area->storage_domain); if (area->storage_domain == domain) area->storage_domain = storage_domain; + if (iopt_is_dmabuf(pages)) { + if (!iopt_dmabuf_revoked(pages)) + iopt_area_unmap_domain(area, domain); + iopt_dmabuf_untrack_domain(pages, area, domain); + } mutex_unlock(&pages->mutex); - iopt_area_unmap_domain(area, domain); + if (!iopt_is_dmabuf(pages)) + iopt_area_unmap_domain(area, domain); } return; } @@ -986,6 +1019,8 @@ static void iopt_unfill_domain(struct io_pagetable *iopt, WARN_ON(area->storage_domain != domain); area->storage_domain = NULL; iopt_area_unfill_domain(area, pages, domain); + if (iopt_is_dmabuf(pages)) + iopt_dmabuf_untrack_domain(pages, area, domain); mutex_unlock(&pages->mutex); } } @@ -1015,10 +1050,16 @@ static int iopt_fill_domain(struct io_pagetable *iopt, if (!pages) continue; - mutex_lock(&pages->mutex); + guard(mutex)(&pages->mutex); + if (iopt_is_dmabuf(pages)) { + rc = iopt_dmabuf_track_domain(pages, area, domain); + if (rc) + goto out_unfill; + } rc = iopt_area_fill_domain(area, domain); if (rc) { - mutex_unlock(&pages->mutex); + if (iopt_is_dmabuf(pages)) + iopt_dmabuf_untrack_domain(pages, area, domain); goto out_unfill; } if (!area->storage_domain) { @@ -1027,7 +1068,6 @@ static int iopt_fill_domain(struct io_pagetable *iopt, interval_tree_insert(&area->pages_node, &pages->domains_itree); } - mutex_unlock(&pages->mutex); } return 0; @@ -1048,6 +1088,8 @@ out_unfill: area->storage_domain = NULL; } iopt_area_unfill_domain(area, pages, domain); + if (iopt_is_dmabuf(pages)) + iopt_dmabuf_untrack_domain(pages, area, domain); mutex_unlock(&pages->mutex); } return rc; @@ -1258,6 +1300,10 @@ static int iopt_area_split(struct iopt_area *area, unsigned long iova) if (!pages || area->prevent_access) return -EBUSY; + /* Maintaining the domains_itree below is a bit complicated */ + if (iopt_is_dmabuf(pages)) + return -EOPNOTSUPP; + if (new_start & (alignment - 1) || iopt_area_start_byte(area, new_start) & (alignment - 1)) return -EINVAL; diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h index b6064f4ce4af..14cd052fd320 100644 --- a/drivers/iommu/iommufd/io_pagetable.h +++ b/drivers/iommu/iommufd/io_pagetable.h @@ -5,6 +5,7 @@ #ifndef __IO_PAGETABLE_H #define __IO_PAGETABLE_H +#include <linux/dma-buf.h> #include <linux/interval_tree.h> #include <linux/kref.h> #include <linux/mutex.h> @@ -69,6 +70,16 @@ void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages, void iopt_area_unmap_domain(struct iopt_area *area, struct iommu_domain *domain); +int iopt_dmabuf_track_domain(struct iopt_pages *pages, struct iopt_area *area, + struct iommu_domain *domain); +void iopt_dmabuf_untrack_domain(struct iopt_pages *pages, + struct iopt_area *area, + struct iommu_domain *domain); +int iopt_dmabuf_track_all_domains(struct iopt_area *area, + struct iopt_pages *pages); +void iopt_dmabuf_untrack_all_domains(struct iopt_area *area, + struct iopt_pages *pages); + static inline unsigned long iopt_area_index(struct iopt_area *area) { return area->pages_node.start; @@ -179,7 +190,22 @@ enum { enum iopt_address_type { IOPT_ADDRESS_USER = 0, - IOPT_ADDRESS_FILE = 1, + IOPT_ADDRESS_FILE, + IOPT_ADDRESS_DMABUF, +}; + +struct iopt_pages_dmabuf_track { + struct iommu_domain *domain; + struct iopt_area *area; + struct list_head elm; +}; + +struct iopt_pages_dmabuf { + struct dma_buf_attachment *attach; + struct dma_buf_phys_vec phys; + /* Always PAGE_SIZE aligned */ + unsigned long start; + struct list_head tracker; }; /* @@ -209,6 +235,8 @@ struct iopt_pages { struct file *file; unsigned long start; }; + /* IOPT_ADDRESS_DMABUF */ + struct iopt_pages_dmabuf dmabuf; }; bool writable:1; u8 account_mode; @@ -220,10 +248,32 @@ struct iopt_pages { struct rb_root_cached domains_itree; }; +static inline bool iopt_is_dmabuf(struct iopt_pages *pages) +{ + if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER)) + return false; + return pages->type == IOPT_ADDRESS_DMABUF; +} + +static inline bool iopt_dmabuf_revoked(struct iopt_pages *pages) +{ + lockdep_assert_held(&pages->mutex); + if (iopt_is_dmabuf(pages)) + return pages->dmabuf.phys.len == 0; + return false; +} + struct iopt_pages *iopt_alloc_user_pages(void __user *uptr, unsigned long length, bool writable); -struct iopt_pages *iopt_alloc_file_pages(struct file *file, unsigned long start, +struct iopt_pages *iopt_alloc_file_pages(struct file *file, + unsigned long start_byte, + unsigned long start, unsigned long length, bool writable); +struct iopt_pages *iopt_alloc_dmabuf_pages(struct iommufd_ctx *ictx, + struct dma_buf *dmabuf, + unsigned long start_byte, + unsigned long start, + unsigned long length, bool writable); void iopt_release_pages(struct kref *kref); static inline void iopt_put_pages(struct iopt_pages *pages) { diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index 1542c5fd10a8..f4721afedadc 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -207,7 +207,6 @@ int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd) unsigned long iova = cmd->iova; struct iommufd_ioas *ioas; unsigned int flags = 0; - struct file *file; int rc; if (cmd->flags & @@ -229,11 +228,7 @@ int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd) if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA)) flags = IOPT_ALLOC_IOVA; - file = fget(cmd->fd); - if (!file) - return -EBADF; - - rc = iopt_map_file_pages(ucmd->ictx, &ioas->iopt, &iova, file, + rc = iopt_map_file_pages(ucmd->ictx, &ioas->iopt, &iova, cmd->fd, cmd->start, cmd->length, conv_iommu_prot(cmd->flags), flags); if (rc) @@ -243,7 +238,6 @@ int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd) rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_put: iommufd_put_object(ucmd->ictx, &ioas->obj); - fput(file); return rc; } @@ -367,6 +361,10 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd) &unmapped); if (rc) goto out_put; + if (!unmapped) { + rc = -ENOENT; + goto out_put; + } } cmd->length = unmapped; diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 627f9b78483a..eb6d1a70f673 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -19,6 +19,8 @@ struct iommu_domain; struct iommu_group; struct iommu_option; struct iommufd_device; +struct dma_buf_attachment; +struct dma_buf_phys_vec; struct iommufd_sw_msi_map { struct list_head sw_msi_item; @@ -108,7 +110,7 @@ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, unsigned long length, int iommu_prot, unsigned int flags); int iopt_map_file_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, - unsigned long *iova, struct file *file, + unsigned long *iova, int fd, unsigned long start, unsigned long length, int iommu_prot, unsigned int flags); int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, @@ -504,6 +506,8 @@ void iommufd_device_pre_destroy(struct iommufd_object *obj); void iommufd_device_destroy(struct iommufd_object *obj); int iommufd_get_hw_info(struct iommufd_ucmd *ucmd); +struct device *iommufd_global_device(void); + struct iommufd_access { struct iommufd_object obj; struct iommufd_ctx *ictx; @@ -614,7 +618,6 @@ struct iommufd_veventq { struct iommufd_eventq common; struct iommufd_viommu *viommu; struct list_head node; /* for iommufd_viommu::veventqs */ - struct iommufd_vevent lost_events_header; enum iommu_veventq_type type; unsigned int depth; @@ -622,6 +625,9 @@ struct iommufd_veventq { /* Use common.lock for protection */ u32 num_events; u32 sequence; + + /* Must be last as it ends in a flexible-array member. */ + struct iommufd_vevent lost_events_header; }; static inline struct iommufd_veventq * @@ -711,6 +717,8 @@ bool iommufd_should_fail(void); int __init iommufd_test_init(void); void iommufd_test_exit(void); bool iommufd_selftest_is_mock_dev(struct device *dev); +int iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, + struct dma_buf_phys_vec *phys); #else static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, unsigned int ioas_id, @@ -732,5 +740,11 @@ static inline bool iommufd_selftest_is_mock_dev(struct device *dev) { return false; } +static inline int +iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, + struct dma_buf_phys_vec *phys) +{ + return -EOPNOTSUPP; +} #endif #endif diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index 8fc618b2bcf9..73e73e1ec158 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -29,11 +29,22 @@ enum { IOMMU_TEST_OP_PASID_REPLACE, IOMMU_TEST_OP_PASID_DETACH, IOMMU_TEST_OP_PASID_CHECK_HWPT, + IOMMU_TEST_OP_DMABUF_GET, + IOMMU_TEST_OP_DMABUF_REVOKE, }; enum { + MOCK_IOMMUPT_DEFAULT = 0, + MOCK_IOMMUPT_HUGE, + MOCK_IOMMUPT_AMDV1, +}; + +/* These values are true for MOCK_IOMMUPT_DEFAULT */ +enum { MOCK_APERTURE_START = 1UL << 24, MOCK_APERTURE_LAST = (1UL << 31) - 1, + MOCK_PAGE_SIZE = 2048, + MOCK_HUGE_PAGE_SIZE = 512 * MOCK_PAGE_SIZE, }; enum { @@ -52,7 +63,6 @@ enum { enum { MOCK_FLAGS_DEVICE_NO_DIRTY = 1 << 0, - MOCK_FLAGS_DEVICE_HUGE_IOVA = 1 << 1, MOCK_FLAGS_DEVICE_PASID = 1 << 2, }; @@ -176,6 +186,14 @@ struct iommu_test_cmd { __u32 hwpt_id; /* @id is stdev_id */ } pasid_check; + struct { + __u32 length; + __u32 open_flags; + } dmabuf_get; + struct { + __s32 dmabuf_fd; + __u32 revoked; + } dmabuf_revoke; }; __u32 last; }; @@ -205,6 +223,7 @@ struct iommu_test_hw_info { */ struct iommu_hwpt_selftest { __u32 iotlb; + __u32 pagetable_type; }; /* Should not be equal to any defined value in enum iommu_hwpt_invalidate_data_type */ diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index 4514575818fc..b5b67a9d3fb3 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -130,9 +130,8 @@ struct iova_bitmap { static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap, unsigned long iova) { - unsigned long pgsize = 1UL << bitmap->mapped.pgshift; - - return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize); + return (iova >> bitmap->mapped.pgshift) / + BITS_PER_TYPE(*bitmap->bitmap); } /* diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index ce775fbbae94..5cc4b08c25f5 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -751,6 +751,15 @@ static struct miscdevice vfio_misc_dev = { .mode = 0666, }; +/* + * Used only by DMABUF, returns a valid struct device to use as a dummy struct + * device for attachment. + */ +struct device *iommufd_global_device(void) +{ + return iommu_misc_dev.this_device; +} + static int __init iommufd_init(void) { int ret; @@ -794,5 +803,6 @@ MODULE_ALIAS("devname:vfio/vfio"); #endif MODULE_IMPORT_NS("IOMMUFD_INTERNAL"); MODULE_IMPORT_NS("IOMMUFD"); +MODULE_IMPORT_NS("DMA_BUF"); MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index c3433b845561..dbe51ecb9a20 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -45,6 +45,8 @@ * last_iova + 1 can overflow. An iopt_pages index will always be much less than * ULONG_MAX so last_index + 1 cannot overflow. */ +#include <linux/dma-buf.h> +#include <linux/dma-resv.h> #include <linux/file.h> #include <linux/highmem.h> #include <linux/iommu.h> @@ -53,6 +55,7 @@ #include <linux/overflow.h> #include <linux/slab.h> #include <linux/sched/mm.h> +#include <linux/vfio_pci_core.h> #include "double_span.h" #include "io_pagetable.h" @@ -258,6 +261,11 @@ static struct iopt_area *iopt_pages_find_domain_area(struct iopt_pages *pages, return container_of(node, struct iopt_area, pages_node); } +enum batch_kind { + BATCH_CPU_MEMORY = 0, + BATCH_MMIO, +}; + /* * A simple datastructure to hold a vector of PFNs, optimized for contiguous * PFNs. This is used as a temporary holding memory for shuttling pfns from one @@ -271,7 +279,9 @@ struct pfn_batch { unsigned int array_size; unsigned int end; unsigned int total_pfns; + enum batch_kind kind; }; +enum { MAX_NPFNS = type_max(typeof(((struct pfn_batch *)0)->npfns[0])) }; static void batch_clear(struct pfn_batch *batch) { @@ -348,11 +358,17 @@ static void batch_destroy(struct pfn_batch *batch, void *backup) } static bool batch_add_pfn_num(struct pfn_batch *batch, unsigned long pfn, - u32 nr) + u32 nr, enum batch_kind kind) { - const unsigned int MAX_NPFNS = type_max(typeof(*batch->npfns)); unsigned int end = batch->end; + if (batch->kind != kind) { + /* One kind per batch */ + if (batch->end != 0) + return false; + batch->kind = kind; + } + if (end && pfn == batch->pfns[end - 1] + batch->npfns[end - 1] && nr <= MAX_NPFNS - batch->npfns[end - 1]) { batch->npfns[end - 1] += nr; @@ -379,7 +395,7 @@ static void batch_remove_pfn_num(struct pfn_batch *batch, unsigned long nr) /* true if the pfn was added, false otherwise */ static bool batch_add_pfn(struct pfn_batch *batch, unsigned long pfn) { - return batch_add_pfn_num(batch, pfn, 1); + return batch_add_pfn_num(batch, pfn, 1, BATCH_CPU_MEMORY); } /* @@ -492,6 +508,7 @@ static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain, { bool disable_large_pages = area->iopt->disable_large_pages; unsigned long last_iova = iopt_area_last_iova(area); + int iommu_prot = area->iommu_prot; unsigned int page_offset = 0; unsigned long start_iova; unsigned long next_iova; @@ -499,6 +516,11 @@ static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain, unsigned long iova; int rc; + if (batch->kind == BATCH_MMIO) { + iommu_prot &= ~IOMMU_CACHE; + iommu_prot |= IOMMU_MMIO; + } + /* The first index might be a partial page */ if (start_index == iopt_area_index(area)) page_offset = area->page_offset; @@ -512,11 +534,11 @@ static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain, rc = batch_iommu_map_small( domain, iova, PFN_PHYS(batch->pfns[cur]) + page_offset, - next_iova - iova, area->iommu_prot); + next_iova - iova, iommu_prot); else rc = iommu_map(domain, iova, PFN_PHYS(batch->pfns[cur]) + page_offset, - next_iova - iova, area->iommu_prot, + next_iova - iova, iommu_prot, GFP_KERNEL_ACCOUNT); if (rc) goto err_unmap; @@ -652,7 +674,7 @@ static int batch_from_folios(struct pfn_batch *batch, struct folio ***folios_p, nr = min(nr, npages); npages -= nr; - if (!batch_add_pfn_num(batch, pfn, nr)) + if (!batch_add_pfn_num(batch, pfn, nr, BATCH_CPU_MEMORY)) break; if (nr > 1) { rc = folio_add_pins(folio, nr - 1); @@ -1054,6 +1076,41 @@ static int pfn_reader_user_update_pinned(struct pfn_reader_user *user, return iopt_pages_update_pinned(pages, npages, inc, user); } +struct pfn_reader_dmabuf { + struct dma_buf_phys_vec phys; + unsigned long start_offset; +}; + +static int pfn_reader_dmabuf_init(struct pfn_reader_dmabuf *dmabuf, + struct iopt_pages *pages) +{ + /* Callers must not get here if the dmabuf was already revoked */ + if (WARN_ON(iopt_dmabuf_revoked(pages))) + return -EINVAL; + + dmabuf->phys = pages->dmabuf.phys; + dmabuf->start_offset = pages->dmabuf.start; + return 0; +} + +static int pfn_reader_fill_dmabuf(struct pfn_reader_dmabuf *dmabuf, + struct pfn_batch *batch, + unsigned long start_index, + unsigned long last_index) +{ + unsigned long start = dmabuf->start_offset + start_index * PAGE_SIZE; + + /* + * start/last_index and start are all PAGE_SIZE aligned, the batch is + * always filled using page size aligned PFNs just like the other types. + * If the dmabuf has been sliced on a sub page offset then the common + * batch to domain code will adjust it before mapping to the domain. + */ + batch_add_pfn_num(batch, PHYS_PFN(dmabuf->phys.paddr + start), + last_index - start_index + 1, BATCH_MMIO); + return 0; +} + /* * PFNs are stored in three places, in order of preference: * - The iopt_pages xarray. This is only populated if there is a @@ -1072,7 +1129,10 @@ struct pfn_reader { unsigned long batch_end_index; unsigned long last_index; - struct pfn_reader_user user; + union { + struct pfn_reader_user user; + struct pfn_reader_dmabuf dmabuf; + }; }; static int pfn_reader_update_pinned(struct pfn_reader *pfns) @@ -1108,7 +1168,7 @@ static int pfn_reader_fill_span(struct pfn_reader *pfns) { struct interval_tree_double_span_iter *span = &pfns->span; unsigned long start_index = pfns->batch_end_index; - struct pfn_reader_user *user = &pfns->user; + struct pfn_reader_user *user; unsigned long npages; struct iopt_area *area; int rc; @@ -1140,8 +1200,13 @@ static int pfn_reader_fill_span(struct pfn_reader *pfns) return 0; } - if (start_index >= pfns->user.upages_end) { - rc = pfn_reader_user_pin(&pfns->user, pfns->pages, start_index, + if (iopt_is_dmabuf(pfns->pages)) + return pfn_reader_fill_dmabuf(&pfns->dmabuf, &pfns->batch, + start_index, span->last_hole); + + user = &pfns->user; + if (start_index >= user->upages_end) { + rc = pfn_reader_user_pin(user, pfns->pages, start_index, span->last_hole); if (rc) return rc; @@ -1209,7 +1274,10 @@ static int pfn_reader_init(struct pfn_reader *pfns, struct iopt_pages *pages, pfns->batch_start_index = start_index; pfns->batch_end_index = start_index; pfns->last_index = last_index; - pfn_reader_user_init(&pfns->user, pages); + if (iopt_is_dmabuf(pages)) + pfn_reader_dmabuf_init(&pfns->dmabuf, pages); + else + pfn_reader_user_init(&pfns->user, pages); rc = batch_init(&pfns->batch, last_index - start_index + 1); if (rc) return rc; @@ -1230,8 +1298,12 @@ static int pfn_reader_init(struct pfn_reader *pfns, struct iopt_pages *pages, static void pfn_reader_release_pins(struct pfn_reader *pfns) { struct iopt_pages *pages = pfns->pages; - struct pfn_reader_user *user = &pfns->user; + struct pfn_reader_user *user; + + if (iopt_is_dmabuf(pages)) + return; + user = &pfns->user; if (user->upages_end > pfns->batch_end_index) { /* Any pages not transferred to the batch are just unpinned */ @@ -1261,7 +1333,8 @@ static void pfn_reader_destroy(struct pfn_reader *pfns) struct iopt_pages *pages = pfns->pages; pfn_reader_release_pins(pfns); - pfn_reader_user_destroy(&pfns->user, pfns->pages); + if (!iopt_is_dmabuf(pfns->pages)) + pfn_reader_user_destroy(&pfns->user, pfns->pages); batch_destroy(&pfns->batch, NULL); WARN_ON(pages->last_npinned != pages->npinned); } @@ -1340,26 +1413,234 @@ struct iopt_pages *iopt_alloc_user_pages(void __user *uptr, return pages; } -struct iopt_pages *iopt_alloc_file_pages(struct file *file, unsigned long start, +struct iopt_pages *iopt_alloc_file_pages(struct file *file, + unsigned long start_byte, + unsigned long start, unsigned long length, bool writable) { struct iopt_pages *pages; - unsigned long start_down = ALIGN_DOWN(start, PAGE_SIZE); - unsigned long end; - if (length && check_add_overflow(start, length - 1, &end)) - return ERR_PTR(-EOVERFLOW); - - pages = iopt_alloc_pages(start - start_down, length, writable); + pages = iopt_alloc_pages(start_byte, length, writable); if (IS_ERR(pages)) return pages; pages->file = get_file(file); - pages->start = start_down; + pages->start = start - start_byte; pages->type = IOPT_ADDRESS_FILE; return pages; } +static void iopt_revoke_notify(struct dma_buf_attachment *attach) +{ + struct iopt_pages *pages = attach->importer_priv; + struct iopt_pages_dmabuf_track *track; + + guard(mutex)(&pages->mutex); + if (iopt_dmabuf_revoked(pages)) + return; + + list_for_each_entry(track, &pages->dmabuf.tracker, elm) { + struct iopt_area *area = track->area; + + iopt_area_unmap_domain_range(area, track->domain, + iopt_area_index(area), + iopt_area_last_index(area)); + } + pages->dmabuf.phys.len = 0; +} + +static struct dma_buf_attach_ops iopt_dmabuf_attach_revoke_ops = { + .allow_peer2peer = true, + .move_notify = iopt_revoke_notify, +}; + +/* + * iommufd and vfio have a circular dependency. Future work for a phys + * based private interconnect will remove this. + */ +static int +sym_vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, + struct dma_buf_phys_vec *phys) +{ + typeof(&vfio_pci_dma_buf_iommufd_map) fn; + int rc; + + rc = iommufd_test_dma_buf_iommufd_map(attachment, phys); + if (rc != -EOPNOTSUPP) + return rc; + + if (!IS_ENABLED(CONFIG_VFIO_PCI_DMABUF)) + return -EOPNOTSUPP; + + fn = symbol_get(vfio_pci_dma_buf_iommufd_map); + if (!fn) + return -EOPNOTSUPP; + rc = fn(attachment, phys); + symbol_put(vfio_pci_dma_buf_iommufd_map); + return rc; +} + +static int iopt_map_dmabuf(struct iommufd_ctx *ictx, struct iopt_pages *pages, + struct dma_buf *dmabuf) +{ + struct dma_buf_attachment *attach; + int rc; + + attach = dma_buf_dynamic_attach(dmabuf, iommufd_global_device(), + &iopt_dmabuf_attach_revoke_ops, pages); + if (IS_ERR(attach)) + return PTR_ERR(attach); + + dma_resv_lock(dmabuf->resv, NULL); + /* + * Lock ordering requires the mutex to be taken inside the reservation, + * make sure lockdep sees this. + */ + if (IS_ENABLED(CONFIG_LOCKDEP)) { + mutex_lock(&pages->mutex); + mutex_unlock(&pages->mutex); + } + + rc = sym_vfio_pci_dma_buf_iommufd_map(attach, &pages->dmabuf.phys); + if (rc) + goto err_detach; + + dma_resv_unlock(dmabuf->resv); + + /* On success iopt_release_pages() will detach and put the dmabuf. */ + pages->dmabuf.attach = attach; + return 0; + +err_detach: + dma_resv_unlock(dmabuf->resv); + dma_buf_detach(dmabuf, attach); + return rc; +} + +struct iopt_pages *iopt_alloc_dmabuf_pages(struct iommufd_ctx *ictx, + struct dma_buf *dmabuf, + unsigned long start_byte, + unsigned long start, + unsigned long length, bool writable) +{ + static struct lock_class_key pages_dmabuf_mutex_key; + struct iopt_pages *pages; + int rc; + + if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER)) + return ERR_PTR(-EOPNOTSUPP); + + if (dmabuf->size <= (start + length - 1) || + length / PAGE_SIZE >= MAX_NPFNS) + return ERR_PTR(-EINVAL); + + pages = iopt_alloc_pages(start_byte, length, writable); + if (IS_ERR(pages)) + return pages; + + /* + * The mmap_lock can be held when obtaining the dmabuf reservation lock + * which creates a locking cycle with the pages mutex which is held + * while obtaining the mmap_lock. This locking path is not present for + * IOPT_ADDRESS_DMABUF so split the lock class. + */ + lockdep_set_class(&pages->mutex, &pages_dmabuf_mutex_key); + + /* dmabuf does not use pinned page accounting. */ + pages->account_mode = IOPT_PAGES_ACCOUNT_NONE; + pages->type = IOPT_ADDRESS_DMABUF; + pages->dmabuf.start = start - start_byte; + INIT_LIST_HEAD(&pages->dmabuf.tracker); + + rc = iopt_map_dmabuf(ictx, pages, dmabuf); + if (rc) { + iopt_put_pages(pages); + return ERR_PTR(rc); + } + + return pages; +} + +int iopt_dmabuf_track_domain(struct iopt_pages *pages, struct iopt_area *area, + struct iommu_domain *domain) +{ + struct iopt_pages_dmabuf_track *track; + + lockdep_assert_held(&pages->mutex); + if (WARN_ON(!iopt_is_dmabuf(pages))) + return -EINVAL; + + list_for_each_entry(track, &pages->dmabuf.tracker, elm) + if (WARN_ON(track->domain == domain && track->area == area)) + return -EINVAL; + + track = kzalloc(sizeof(*track), GFP_KERNEL); + if (!track) + return -ENOMEM; + track->domain = domain; + track->area = area; + list_add_tail(&track->elm, &pages->dmabuf.tracker); + + return 0; +} + +void iopt_dmabuf_untrack_domain(struct iopt_pages *pages, + struct iopt_area *area, + struct iommu_domain *domain) +{ + struct iopt_pages_dmabuf_track *track; + + lockdep_assert_held(&pages->mutex); + WARN_ON(!iopt_is_dmabuf(pages)); + + list_for_each_entry(track, &pages->dmabuf.tracker, elm) { + if (track->domain == domain && track->area == area) { + list_del(&track->elm); + kfree(track); + return; + } + } + WARN_ON(true); +} + +int iopt_dmabuf_track_all_domains(struct iopt_area *area, + struct iopt_pages *pages) +{ + struct iopt_pages_dmabuf_track *track; + struct iommu_domain *domain; + unsigned long index; + int rc; + + list_for_each_entry(track, &pages->dmabuf.tracker, elm) + if (WARN_ON(track->area == area)) + return -EINVAL; + + xa_for_each(&area->iopt->domains, index, domain) { + rc = iopt_dmabuf_track_domain(pages, area, domain); + if (rc) + goto err_untrack; + } + return 0; +err_untrack: + iopt_dmabuf_untrack_all_domains(area, pages); + return rc; +} + +void iopt_dmabuf_untrack_all_domains(struct iopt_area *area, + struct iopt_pages *pages) +{ + struct iopt_pages_dmabuf_track *track; + struct iopt_pages_dmabuf_track *tmp; + + list_for_each_entry_safe(track, tmp, &pages->dmabuf.tracker, + elm) { + if (track->area == area) { + list_del(&track->elm); + kfree(track); + } + } +} + void iopt_release_pages(struct kref *kref) { struct iopt_pages *pages = container_of(kref, struct iopt_pages, kref); @@ -1372,8 +1653,15 @@ void iopt_release_pages(struct kref *kref) mutex_destroy(&pages->mutex); put_task_struct(pages->source_task); free_uid(pages->source_user); - if (pages->type == IOPT_ADDRESS_FILE) + if (iopt_is_dmabuf(pages) && pages->dmabuf.attach) { + struct dma_buf *dmabuf = pages->dmabuf.attach->dmabuf; + + dma_buf_detach(dmabuf, pages->dmabuf.attach); + dma_buf_put(dmabuf); + WARN_ON(!list_empty(&pages->dmabuf.tracker)); + } else if (pages->type == IOPT_ADDRESS_FILE) { fput(pages->file); + } kfree(pages); } @@ -1451,6 +1739,14 @@ static void __iopt_area_unfill_domain(struct iopt_area *area, lockdep_assert_held(&pages->mutex); + if (iopt_is_dmabuf(pages)) { + if (WARN_ON(iopt_dmabuf_revoked(pages))) + return; + iopt_area_unmap_domain_range(area, domain, start_index, + last_index); + return; + } + /* * For security we must not unpin something that is still DMA mapped, * so this must unmap any IOVA before we go ahead and unpin the pages. @@ -1526,6 +1822,9 @@ void iopt_area_unmap_domain(struct iopt_area *area, struct iommu_domain *domain) void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages, struct iommu_domain *domain) { + if (iopt_dmabuf_revoked(pages)) + return; + __iopt_area_unfill_domain(area, pages, domain, iopt_area_last_index(area)); } @@ -1546,6 +1845,9 @@ int iopt_area_fill_domain(struct iopt_area *area, struct iommu_domain *domain) lockdep_assert_held(&area->pages->mutex); + if (iopt_dmabuf_revoked(area->pages)) + return 0; + rc = pfn_reader_first(&pfns, area->pages, iopt_area_index(area), iopt_area_last_index(area)); if (rc) @@ -1605,33 +1907,44 @@ int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages) return 0; mutex_lock(&pages->mutex); - rc = pfn_reader_first(&pfns, pages, iopt_area_index(area), - iopt_area_last_index(area)); - if (rc) - goto out_unlock; + if (iopt_is_dmabuf(pages)) { + rc = iopt_dmabuf_track_all_domains(area, pages); + if (rc) + goto out_unlock; + } - while (!pfn_reader_done(&pfns)) { - done_first_end_index = pfns.batch_end_index; - done_all_end_index = pfns.batch_start_index; - xa_for_each(&area->iopt->domains, index, domain) { - rc = batch_to_domain(&pfns.batch, domain, area, - pfns.batch_start_index); + if (!iopt_dmabuf_revoked(pages)) { + rc = pfn_reader_first(&pfns, pages, iopt_area_index(area), + iopt_area_last_index(area)); + if (rc) + goto out_untrack; + + while (!pfn_reader_done(&pfns)) { + done_first_end_index = pfns.batch_end_index; + done_all_end_index = pfns.batch_start_index; + xa_for_each(&area->iopt->domains, index, domain) { + rc = batch_to_domain(&pfns.batch, domain, area, + pfns.batch_start_index); + if (rc) + goto out_unmap; + } + done_all_end_index = done_first_end_index; + + rc = pfn_reader_next(&pfns); if (rc) goto out_unmap; } - done_all_end_index = done_first_end_index; - - rc = pfn_reader_next(&pfns); + rc = pfn_reader_update_pinned(&pfns); if (rc) goto out_unmap; + + pfn_reader_destroy(&pfns); } - rc = pfn_reader_update_pinned(&pfns); - if (rc) - goto out_unmap; area->storage_domain = xa_load(&area->iopt->domains, 0); interval_tree_insert(&area->pages_node, &pages->domains_itree); - goto out_destroy; + mutex_unlock(&pages->mutex); + return 0; out_unmap: pfn_reader_release_pins(&pfns); @@ -1658,8 +1971,10 @@ out_unmap: end_index); } } -out_destroy: pfn_reader_destroy(&pfns); +out_untrack: + if (iopt_is_dmabuf(pages)) + iopt_dmabuf_untrack_all_domains(area, pages); out_unlock: mutex_unlock(&pages->mutex); return rc; @@ -1685,16 +2000,22 @@ void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages) if (!area->storage_domain) goto out_unlock; - xa_for_each(&iopt->domains, index, domain) - if (domain != area->storage_domain) + xa_for_each(&iopt->domains, index, domain) { + if (domain == area->storage_domain) + continue; + + if (!iopt_dmabuf_revoked(pages)) iopt_area_unmap_domain_range( area, domain, iopt_area_index(area), iopt_area_last_index(area)); + } if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(RB_EMPTY_NODE(&area->pages_node.rb)); interval_tree_remove(&area->pages_node, &pages->domains_itree); iopt_area_unfill_domain(area, pages, area->storage_domain); + if (iopt_is_dmabuf(pages)) + iopt_dmabuf_untrack_all_domains(area, pages); area->storage_domain = NULL; out_unlock: mutex_unlock(&pages->mutex); @@ -2031,15 +2352,14 @@ int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte, if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable) return -EPERM; - if (pages->type == IOPT_ADDRESS_FILE) + if (iopt_is_dmabuf(pages)) + return -EINVAL; + + if (pages->type != IOPT_ADDRESS_USER) return iopt_pages_rw_slow(pages, start_index, last_index, start_byte % PAGE_SIZE, data, length, flags); - if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && - WARN_ON(pages->type != IOPT_ADDRESS_USER)) - return -EINVAL; - if (!(flags & IOMMUFD_ACCESS_RW_KTHREAD) && change_mm) { if (start_index == last_index) return iopt_pages_rw_page(pages, start_index, diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index de178827a078..c4322fd26f93 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -5,6 +5,8 @@ */ #include <linux/anon_inodes.h> #include <linux/debugfs.h> +#include <linux/dma-buf.h> +#include <linux/dma-resv.h> #include <linux/fault-inject.h> #include <linux/file.h> #include <linux/iommu.h> @@ -12,6 +14,8 @@ #include <linux/slab.h> #include <linux/xarray.h> #include <uapi/linux/iommufd.h> +#include <linux/generic_pt/iommu.h> +#include "../iommu-pages.h" #include "../iommu-priv.h" #include "io_pagetable.h" @@ -41,21 +45,6 @@ static DEFINE_IDA(mock_dev_ida); enum { MOCK_DIRTY_TRACK = 1, - MOCK_IO_PAGE_SIZE = PAGE_SIZE / 2, - MOCK_HUGE_PAGE_SIZE = 512 * MOCK_IO_PAGE_SIZE, - - /* - * Like a real page table alignment requires the low bits of the address - * to be zero. xarray also requires the high bit to be zero, so we store - * the pfns shifted. The upper bits are used for metadata. - */ - MOCK_PFN_MASK = ULONG_MAX / MOCK_IO_PAGE_SIZE, - - _MOCK_PFN_START = MOCK_PFN_MASK + 1, - MOCK_PFN_START_IOVA = _MOCK_PFN_START, - MOCK_PFN_LAST_IOVA = _MOCK_PFN_START, - MOCK_PFN_DIRTY_IOVA = _MOCK_PFN_START << 1, - MOCK_PFN_HUGE_IOVA = _MOCK_PFN_START << 2, }; static int mock_dev_enable_iopf(struct device *dev, struct iommu_domain *domain); @@ -124,10 +113,15 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, } struct mock_iommu_domain { + union { + struct iommu_domain domain; + struct pt_iommu iommu; + struct pt_iommu_amdv1 amdv1; + }; unsigned long flags; - struct iommu_domain domain; - struct xarray pfns; }; +PT_IOMMU_CHECK_DOMAIN(struct mock_iommu_domain, iommu, domain); +PT_IOMMU_CHECK_DOMAIN(struct mock_iommu_domain, amdv1.iommu, domain); static inline struct mock_iommu_domain * to_mock_domain(struct iommu_domain *domain) @@ -216,7 +210,7 @@ static inline struct selftest_obj *to_selftest_obj(struct iommufd_object *obj) } static int mock_domain_nop_attach(struct iommu_domain *domain, - struct device *dev) + struct device *dev, struct iommu_domain *old) { struct mock_dev *mdev = to_mock_dev(dev); struct mock_viommu *new_viommu = NULL; @@ -344,74 +338,6 @@ static int mock_domain_set_dirty_tracking(struct iommu_domain *domain, return 0; } -static bool mock_test_and_clear_dirty(struct mock_iommu_domain *mock, - unsigned long iova, size_t page_size, - unsigned long flags) -{ - unsigned long cur, end = iova + page_size - 1; - bool dirty = false; - void *ent, *old; - - for (cur = iova; cur < end; cur += MOCK_IO_PAGE_SIZE) { - ent = xa_load(&mock->pfns, cur / MOCK_IO_PAGE_SIZE); - if (!ent || !(xa_to_value(ent) & MOCK_PFN_DIRTY_IOVA)) - continue; - - dirty = true; - /* Clear dirty */ - if (!(flags & IOMMU_DIRTY_NO_CLEAR)) { - unsigned long val; - - val = xa_to_value(ent) & ~MOCK_PFN_DIRTY_IOVA; - old = xa_store(&mock->pfns, cur / MOCK_IO_PAGE_SIZE, - xa_mk_value(val), GFP_KERNEL); - WARN_ON_ONCE(ent != old); - } - } - - return dirty; -} - -static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, - unsigned long iova, size_t size, - unsigned long flags, - struct iommu_dirty_bitmap *dirty) -{ - struct mock_iommu_domain *mock = to_mock_domain(domain); - unsigned long end = iova + size; - void *ent; - - if (!(mock->flags & MOCK_DIRTY_TRACK) && dirty->bitmap) - return -EINVAL; - - do { - unsigned long pgsize = MOCK_IO_PAGE_SIZE; - unsigned long head; - - ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE); - if (!ent) { - iova += pgsize; - continue; - } - - if (xa_to_value(ent) & MOCK_PFN_HUGE_IOVA) - pgsize = MOCK_HUGE_PAGE_SIZE; - head = iova & ~(pgsize - 1); - - /* Clear dirty */ - if (mock_test_and_clear_dirty(mock, head, pgsize, flags)) - iommu_dirty_bitmap_record(dirty, iova, pgsize); - iova += pgsize; - } while (iova < end); - - return 0; -} - -static const struct iommu_dirty_ops dirty_ops = { - .set_dirty_tracking = mock_domain_set_dirty_tracking, - .read_and_clear_dirty = mock_domain_read_and_clear_dirty, -}; - static struct mock_iommu_domain_nested * __mock_domain_alloc_nested(const struct iommu_user_data *user_data) { @@ -446,7 +372,7 @@ mock_domain_alloc_nested(struct device *dev, struct iommu_domain *parent, if (flags & ~IOMMU_HWPT_ALLOC_PASID) return ERR_PTR(-EOPNOTSUPP); - if (!parent || parent->ops != mock_ops.default_domain_ops) + if (!parent || !(parent->type & __IOMMU_DOMAIN_PAGING)) return ERR_PTR(-EINVAL); mock_parent = to_mock_domain(parent); @@ -459,159 +385,170 @@ mock_domain_alloc_nested(struct device *dev, struct iommu_domain *parent, return &mock_nested->domain; } -static struct iommu_domain * -mock_domain_alloc_paging_flags(struct device *dev, u32 flags, - const struct iommu_user_data *user_data) -{ - bool has_dirty_flag = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; - const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING | - IOMMU_HWPT_ALLOC_NEST_PARENT | - IOMMU_HWPT_ALLOC_PASID; - struct mock_dev *mdev = to_mock_dev(dev); - bool no_dirty_ops = mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY; - struct mock_iommu_domain *mock; - - if (user_data) - return ERR_PTR(-EOPNOTSUPP); - if ((flags & ~PAGING_FLAGS) || (has_dirty_flag && no_dirty_ops)) - return ERR_PTR(-EOPNOTSUPP); - - mock = kzalloc(sizeof(*mock), GFP_KERNEL); - if (!mock) - return ERR_PTR(-ENOMEM); - mock->domain.geometry.aperture_start = MOCK_APERTURE_START; - mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST; - mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE; - if (dev && mdev->flags & MOCK_FLAGS_DEVICE_HUGE_IOVA) - mock->domain.pgsize_bitmap |= MOCK_HUGE_PAGE_SIZE; - mock->domain.ops = mock_ops.default_domain_ops; - mock->domain.type = IOMMU_DOMAIN_UNMANAGED; - xa_init(&mock->pfns); - - if (has_dirty_flag) - mock->domain.dirty_ops = &dirty_ops; - return &mock->domain; -} - static void mock_domain_free(struct iommu_domain *domain) { struct mock_iommu_domain *mock = to_mock_domain(domain); - WARN_ON(!xa_empty(&mock->pfns)); + pt_iommu_deinit(&mock->iommu); kfree(mock); } -static int mock_domain_map_pages(struct iommu_domain *domain, - unsigned long iova, phys_addr_t paddr, - size_t pgsize, size_t pgcount, int prot, - gfp_t gfp, size_t *mapped) +static void mock_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) { - struct mock_iommu_domain *mock = to_mock_domain(domain); - unsigned long flags = MOCK_PFN_START_IOVA; - unsigned long start_iova = iova; + iommu_put_pages_list(&gather->freelist); +} - /* - * xarray does not reliably work with fault injection because it does a - * retry allocation, so put our own failure point. - */ - if (iommufd_should_fail()) - return -ENOENT; +static const struct iommu_domain_ops amdv1_mock_ops = { + IOMMU_PT_DOMAIN_OPS(amdv1_mock), + .free = mock_domain_free, + .attach_dev = mock_domain_nop_attach, + .set_dev_pasid = mock_domain_set_dev_pasid_nop, + .iotlb_sync = &mock_iotlb_sync, +}; - WARN_ON(iova % MOCK_IO_PAGE_SIZE); - WARN_ON(pgsize % MOCK_IO_PAGE_SIZE); - for (; pgcount; pgcount--) { - size_t cur; +static const struct iommu_domain_ops amdv1_mock_huge_ops = { + IOMMU_PT_DOMAIN_OPS(amdv1_mock), + .free = mock_domain_free, + .attach_dev = mock_domain_nop_attach, + .set_dev_pasid = mock_domain_set_dev_pasid_nop, + .iotlb_sync = &mock_iotlb_sync, +}; +#undef pt_iommu_amdv1_mock_map_pages - for (cur = 0; cur != pgsize; cur += MOCK_IO_PAGE_SIZE) { - void *old; +static const struct iommu_dirty_ops amdv1_mock_dirty_ops = { + IOMMU_PT_DIRTY_OPS(amdv1_mock), + .set_dirty_tracking = mock_domain_set_dirty_tracking, +}; - if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize) - flags = MOCK_PFN_LAST_IOVA; - if (pgsize != MOCK_IO_PAGE_SIZE) { - flags |= MOCK_PFN_HUGE_IOVA; - } - old = xa_store(&mock->pfns, iova / MOCK_IO_PAGE_SIZE, - xa_mk_value((paddr / MOCK_IO_PAGE_SIZE) | - flags), - gfp); - if (xa_is_err(old)) { - for (; start_iova != iova; - start_iova += MOCK_IO_PAGE_SIZE) - xa_erase(&mock->pfns, - start_iova / - MOCK_IO_PAGE_SIZE); - return xa_err(old); - } - WARN_ON(old); - iova += MOCK_IO_PAGE_SIZE; - paddr += MOCK_IO_PAGE_SIZE; - *mapped += MOCK_IO_PAGE_SIZE; - flags = 0; - } - } - return 0; -} +static const struct iommu_domain_ops amdv1_ops = { + IOMMU_PT_DOMAIN_OPS(amdv1), + .free = mock_domain_free, + .attach_dev = mock_domain_nop_attach, + .set_dev_pasid = mock_domain_set_dev_pasid_nop, + .iotlb_sync = &mock_iotlb_sync, +}; -static size_t mock_domain_unmap_pages(struct iommu_domain *domain, - unsigned long iova, size_t pgsize, - size_t pgcount, - struct iommu_iotlb_gather *iotlb_gather) +static const struct iommu_dirty_ops amdv1_dirty_ops = { + IOMMU_PT_DIRTY_OPS(amdv1), + .set_dirty_tracking = mock_domain_set_dirty_tracking, +}; + +static struct mock_iommu_domain * +mock_domain_alloc_pgtable(struct device *dev, + const struct iommu_hwpt_selftest *user_cfg, u32 flags) { - struct mock_iommu_domain *mock = to_mock_domain(domain); - bool first = true; - size_t ret = 0; - void *ent; + struct mock_iommu_domain *mock; + int rc; - WARN_ON(iova % MOCK_IO_PAGE_SIZE); - WARN_ON(pgsize % MOCK_IO_PAGE_SIZE); + mock = kzalloc(sizeof(*mock), GFP_KERNEL); + if (!mock) + return ERR_PTR(-ENOMEM); + mock->domain.type = IOMMU_DOMAIN_UNMANAGED; - for (; pgcount; pgcount--) { - size_t cur; + mock->amdv1.iommu.nid = NUMA_NO_NODE; + + switch (user_cfg->pagetable_type) { + case MOCK_IOMMUPT_DEFAULT: + case MOCK_IOMMUPT_HUGE: { + struct pt_iommu_amdv1_cfg cfg = {}; + + /* The mock version has a 2k page size */ + cfg.common.hw_max_vasz_lg2 = 56; + cfg.common.hw_max_oasz_lg2 = 51; + cfg.starting_level = 2; + if (user_cfg->pagetable_type == MOCK_IOMMUPT_HUGE) + mock->domain.ops = &amdv1_mock_huge_ops; + else + mock->domain.ops = &amdv1_mock_ops; + rc = pt_iommu_amdv1_mock_init(&mock->amdv1, &cfg, GFP_KERNEL); + if (rc) + goto err_free; + + /* + * In huge mode userspace should only provide huge pages, we + * have to include PAGE_SIZE for the domain to be accepted by + * iommufd. + */ + if (user_cfg->pagetable_type == MOCK_IOMMUPT_HUGE) + mock->domain.pgsize_bitmap = MOCK_HUGE_PAGE_SIZE | + PAGE_SIZE; + if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) + mock->domain.dirty_ops = &amdv1_mock_dirty_ops; + break; + } - for (cur = 0; cur != pgsize; cur += MOCK_IO_PAGE_SIZE) { - ent = xa_erase(&mock->pfns, iova / MOCK_IO_PAGE_SIZE); + case MOCK_IOMMUPT_AMDV1: { + struct pt_iommu_amdv1_cfg cfg = {}; + + cfg.common.hw_max_vasz_lg2 = 64; + cfg.common.hw_max_oasz_lg2 = 52; + cfg.common.features = BIT(PT_FEAT_DYNAMIC_TOP) | + BIT(PT_FEAT_AMDV1_ENCRYPT_TABLES) | + BIT(PT_FEAT_AMDV1_FORCE_COHERENCE); + cfg.starting_level = 2; + mock->domain.ops = &amdv1_ops; + rc = pt_iommu_amdv1_init(&mock->amdv1, &cfg, GFP_KERNEL); + if (rc) + goto err_free; + if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) + mock->domain.dirty_ops = &amdv1_dirty_ops; + break; + } + default: + rc = -EOPNOTSUPP; + goto err_free; + } - /* - * iommufd generates unmaps that must be a strict - * superset of the map's performend So every - * starting/ending IOVA should have been an iova passed - * to map. - * - * This simple logic doesn't work when the HUGE_PAGE is - * turned on since the core code will automatically - * switch between the two page sizes creating a break in - * the unmap calls. The break can land in the middle of - * contiguous IOVA. - */ - if (!(domain->pgsize_bitmap & MOCK_HUGE_PAGE_SIZE)) { - if (first) { - WARN_ON(ent && !(xa_to_value(ent) & - MOCK_PFN_START_IOVA)); - first = false; - } - if (pgcount == 1 && - cur + MOCK_IO_PAGE_SIZE == pgsize) - WARN_ON(ent && !(xa_to_value(ent) & - MOCK_PFN_LAST_IOVA)); - } + /* + * Override the real aperture to the MOCK aperture for test purposes. + */ + if (user_cfg->pagetable_type == MOCK_IOMMUPT_DEFAULT) { + WARN_ON(mock->domain.geometry.aperture_start != 0); + WARN_ON(mock->domain.geometry.aperture_end < MOCK_APERTURE_LAST); - iova += MOCK_IO_PAGE_SIZE; - ret += MOCK_IO_PAGE_SIZE; - } + mock->domain.geometry.aperture_start = MOCK_APERTURE_START; + mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST; } - return ret; + + return mock; +err_free: + kfree(mock); + return ERR_PTR(rc); } -static phys_addr_t mock_domain_iova_to_phys(struct iommu_domain *domain, - dma_addr_t iova) +static struct iommu_domain * +mock_domain_alloc_paging_flags(struct device *dev, u32 flags, + const struct iommu_user_data *user_data) { - struct mock_iommu_domain *mock = to_mock_domain(domain); - void *ent; + bool has_dirty_flag = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; + const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING | + IOMMU_HWPT_ALLOC_NEST_PARENT | + IOMMU_HWPT_ALLOC_PASID; + struct mock_dev *mdev = to_mock_dev(dev); + bool no_dirty_ops = mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY; + struct iommu_hwpt_selftest user_cfg = {}; + struct mock_iommu_domain *mock; + int rc; - WARN_ON(iova % MOCK_IO_PAGE_SIZE); - ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE); - WARN_ON(!ent); - return (xa_to_value(ent) & MOCK_PFN_MASK) * MOCK_IO_PAGE_SIZE; + if ((flags & ~PAGING_FLAGS) || (has_dirty_flag && no_dirty_ops)) + return ERR_PTR(-EOPNOTSUPP); + + if (user_data && (user_data->type != IOMMU_HWPT_DATA_SELFTEST && + user_data->type != IOMMU_HWPT_DATA_NONE)) + return ERR_PTR(-EOPNOTSUPP); + + if (user_data) { + rc = iommu_copy_struct_from_user( + &user_cfg, user_data, IOMMU_HWPT_DATA_SELFTEST, iotlb); + if (rc) + return ERR_PTR(rc); + } + + mock = mock_domain_alloc_pgtable(dev, &user_cfg, flags); + if (IS_ERR(mock)) + return ERR_CAST(mock); + return &mock->domain; } static bool mock_domain_capable(struct device *dev, enum iommu_cap cap) @@ -955,15 +892,6 @@ static const struct iommu_ops mock_ops = { .user_pasid_table = true, .get_viommu_size = mock_get_viommu_size, .viommu_init = mock_viommu_init, - .default_domain_ops = - &(struct iommu_domain_ops){ - .free = mock_domain_free, - .attach_dev = mock_domain_nop_attach, - .map_pages = mock_domain_map_pages, - .unmap_pages = mock_domain_unmap_pages, - .iova_to_phys = mock_domain_iova_to_phys, - .set_dev_pasid = mock_domain_set_dev_pasid_nop, - }, }; static void mock_domain_free_nested(struct iommu_domain *domain) @@ -1047,7 +975,7 @@ get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id, if (IS_ERR(hwpt)) return hwpt; if (hwpt->domain->type != IOMMU_DOMAIN_UNMANAGED || - hwpt->domain->ops != mock_ops.default_domain_ops) { + hwpt->domain->owner != &mock_ops) { iommufd_put_object(ucmd->ictx, &hwpt->obj); return ERR_PTR(-EINVAL); } @@ -1088,7 +1016,6 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags) {}, }; const u32 valid_flags = MOCK_FLAGS_DEVICE_NO_DIRTY | - MOCK_FLAGS_DEVICE_HUGE_IOVA | MOCK_FLAGS_DEVICE_PASID; struct mock_dev *mdev; int rc, i; @@ -1277,23 +1204,25 @@ static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd, { struct iommufd_hw_pagetable *hwpt; struct mock_iommu_domain *mock; + unsigned int page_size; uintptr_t end; int rc; - if (iova % MOCK_IO_PAGE_SIZE || length % MOCK_IO_PAGE_SIZE || - (uintptr_t)uptr % MOCK_IO_PAGE_SIZE || - check_add_overflow((uintptr_t)uptr, (uintptr_t)length, &end)) - return -EINVAL; - hwpt = get_md_pagetable(ucmd, mockpt_id, &mock); if (IS_ERR(hwpt)) return PTR_ERR(hwpt); - for (; length; length -= MOCK_IO_PAGE_SIZE) { + page_size = 1 << __ffs(mock->domain.pgsize_bitmap); + if (iova % page_size || length % page_size || + (uintptr_t)uptr % page_size || + check_add_overflow((uintptr_t)uptr, (uintptr_t)length, &end)) + return -EINVAL; + + for (; length; length -= page_size) { struct page *pages[1]; + phys_addr_t io_phys; unsigned long pfn; long npages; - void *ent; npages = get_user_pages_fast((uintptr_t)uptr & PAGE_MASK, 1, 0, pages); @@ -1308,15 +1237,14 @@ static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd, pfn = page_to_pfn(pages[0]); put_page(pages[0]); - ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE); - if (!ent || - (xa_to_value(ent) & MOCK_PFN_MASK) * MOCK_IO_PAGE_SIZE != - pfn * PAGE_SIZE + ((uintptr_t)uptr % PAGE_SIZE)) { + io_phys = mock->domain.ops->iova_to_phys(&mock->domain, iova); + if (io_phys != + pfn * PAGE_SIZE + ((uintptr_t)uptr % PAGE_SIZE)) { rc = -EINVAL; goto out_put; } - iova += MOCK_IO_PAGE_SIZE; - uptr += MOCK_IO_PAGE_SIZE; + iova += page_size; + uptr += page_size; } rc = 0; @@ -1795,7 +1723,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, if (IS_ERR(hwpt)) return PTR_ERR(hwpt); - if (!(mock->flags & MOCK_DIRTY_TRACK)) { + if (!(mock->flags & MOCK_DIRTY_TRACK) || !mock->iommu.ops->set_dirty) { rc = -EINVAL; goto out_put; } @@ -1814,22 +1742,10 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, } for (i = 0; i < max; i++) { - unsigned long cur = iova + i * page_size; - void *ent, *old; - if (!test_bit(i, (unsigned long *)tmp)) continue; - - ent = xa_load(&mock->pfns, cur / page_size); - if (ent) { - unsigned long val; - - val = xa_to_value(ent) | MOCK_PFN_DIRTY_IOVA; - old = xa_store(&mock->pfns, cur / page_size, - xa_mk_value(val), GFP_KERNEL); - WARN_ON_ONCE(ent != old); - count++; - } + mock->iommu.ops->set_dirty(&mock->iommu, iova + i * page_size); + count++; } cmd->dirty.out_nr_dirty = count; @@ -2031,6 +1947,140 @@ void iommufd_selftest_destroy(struct iommufd_object *obj) } } +struct iommufd_test_dma_buf { + void *memory; + size_t length; + bool revoked; +}; + +static int iommufd_test_dma_buf_attach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attachment) +{ + return 0; +} + +static void iommufd_test_dma_buf_detach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attachment) +{ +} + +static struct sg_table * +iommufd_test_dma_buf_map(struct dma_buf_attachment *attachment, + enum dma_data_direction dir) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static void iommufd_test_dma_buf_unmap(struct dma_buf_attachment *attachment, + struct sg_table *sgt, + enum dma_data_direction dir) +{ +} + +static void iommufd_test_dma_buf_release(struct dma_buf *dmabuf) +{ + struct iommufd_test_dma_buf *priv = dmabuf->priv; + + kfree(priv->memory); + kfree(priv); +} + +static const struct dma_buf_ops iommufd_test_dmabuf_ops = { + .attach = iommufd_test_dma_buf_attach, + .detach = iommufd_test_dma_buf_detach, + .map_dma_buf = iommufd_test_dma_buf_map, + .release = iommufd_test_dma_buf_release, + .unmap_dma_buf = iommufd_test_dma_buf_unmap, +}; + +int iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, + struct dma_buf_phys_vec *phys) +{ + struct iommufd_test_dma_buf *priv = attachment->dmabuf->priv; + + dma_resv_assert_held(attachment->dmabuf->resv); + + if (attachment->dmabuf->ops != &iommufd_test_dmabuf_ops) + return -EOPNOTSUPP; + + if (priv->revoked) + return -ENODEV; + + phys->paddr = virt_to_phys(priv->memory); + phys->len = priv->length; + return 0; +} + +static int iommufd_test_dmabuf_get(struct iommufd_ucmd *ucmd, + unsigned int open_flags, + size_t len) +{ + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + struct iommufd_test_dma_buf *priv; + struct dma_buf *dmabuf; + int rc; + + len = ALIGN(len, PAGE_SIZE); + if (len == 0 || len > PAGE_SIZE * 512) + return -EINVAL; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->length = len; + priv->memory = kzalloc(len, GFP_KERNEL); + if (!priv->memory) { + rc = -ENOMEM; + goto err_free; + } + + exp_info.ops = &iommufd_test_dmabuf_ops; + exp_info.size = len; + exp_info.flags = open_flags; + exp_info.priv = priv; + + dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(dmabuf)) { + rc = PTR_ERR(dmabuf); + goto err_free; + } + + return dma_buf_fd(dmabuf, open_flags); + +err_free: + kfree(priv->memory); + kfree(priv); + return rc; +} + +static int iommufd_test_dmabuf_revoke(struct iommufd_ucmd *ucmd, int fd, + bool revoked) +{ + struct iommufd_test_dma_buf *priv; + struct dma_buf *dmabuf; + int rc = 0; + + dmabuf = dma_buf_get(fd); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + if (dmabuf->ops != &iommufd_test_dmabuf_ops) { + rc = -EOPNOTSUPP; + goto err_put; + } + + priv = dmabuf->priv; + dma_resv_lock(dmabuf->resv, NULL); + priv->revoked = revoked; + dma_buf_move_notify(dmabuf); + dma_resv_unlock(dmabuf->resv); + +err_put: + dma_buf_put(dmabuf); + return rc; +} + int iommufd_test(struct iommufd_ucmd *ucmd) { struct iommu_test_cmd *cmd = ucmd->cmd; @@ -2109,6 +2159,13 @@ int iommufd_test(struct iommufd_ucmd *ucmd) return iommufd_test_pasid_detach(ucmd, cmd); case IOMMU_TEST_OP_PASID_CHECK_HWPT: return iommufd_test_pasid_check_hwpt(ucmd, cmd); + case IOMMU_TEST_OP_DMABUF_GET: + return iommufd_test_dmabuf_get(ucmd, cmd->dmabuf_get.open_flags, + cmd->dmabuf_get.length); + case IOMMU_TEST_OP_DMABUF_REVOKE: + return iommufd_test_dmabuf_revoke(ucmd, + cmd->dmabuf_revoke.dmabuf_fd, + cmd->dmabuf_revoke.revoked); default: return -EOPNOTSUPP; } @@ -2202,3 +2259,5 @@ void iommufd_test_exit(void) platform_device_unregister(selftest_iommu_dev); debugfs_remove_recursive(dbgfs_root); } + +MODULE_IMPORT_NS("GENERIC_PT_IOMMU"); |
