diff options
26 files changed, 411 insertions, 67 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 398045c00495..28d6fd75d43a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -27606,8 +27606,9 @@ F: include/uapi/linux/vfio.h F: tools/testing/selftests/vfio/ VFIO FSL-MC DRIVER +M: Ioana Ciornei <ioana.ciornei@nxp.com> L: kvm@vger.kernel.org -S: Obsolete +S: Maintained F: drivers/vfio/fsl-mc/ VFIO HISILICON PCI DRIVER diff --git a/drivers/dma-buf/dma-buf-mapping.c b/drivers/dma-buf/dma-buf-mapping.c index b7352e609fbd..174677faa577 100644 --- a/drivers/dma-buf/dma-buf-mapping.c +++ b/drivers/dma-buf/dma-buf-mapping.c @@ -33,8 +33,8 @@ static struct scatterlist *fill_sg_entry(struct scatterlist *sgl, size_t length, } static unsigned int calc_sg_nents(struct dma_iova_state *state, - struct dma_buf_phys_vec *phys_vec, - size_t nr_ranges, size_t size) + struct phys_vec *phys_vec, size_t nr_ranges, + size_t size) { unsigned int nents = 0; size_t i; @@ -91,7 +91,7 @@ struct dma_buf_dma { */ struct sg_table *dma_buf_phys_vec_to_sgt(struct dma_buf_attachment *attach, struct p2pdma_provider *provider, - struct dma_buf_phys_vec *phys_vec, + struct phys_vec *phys_vec, size_t nr_ranges, size_t size, enum dma_data_direction dir) { diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h index 14cd052fd320..27e3e311d395 100644 --- a/drivers/iommu/iommufd/io_pagetable.h +++ b/drivers/iommu/iommufd/io_pagetable.h @@ -202,7 +202,7 @@ struct iopt_pages_dmabuf_track { struct iopt_pages_dmabuf { struct dma_buf_attachment *attach; - struct dma_buf_phys_vec phys; + struct phys_vec phys; /* Always PAGE_SIZE aligned */ unsigned long start; struct list_head tracker; diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index eb6d1a70f673..6ac1965199e9 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -20,7 +20,6 @@ struct iommu_group; struct iommu_option; struct iommufd_device; struct dma_buf_attachment; -struct dma_buf_phys_vec; struct iommufd_sw_msi_map { struct list_head sw_msi_item; @@ -718,7 +717,7 @@ int __init iommufd_test_init(void); void iommufd_test_exit(void); bool iommufd_selftest_is_mock_dev(struct device *dev); int iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, - struct dma_buf_phys_vec *phys); + struct phys_vec *phys); #else static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, unsigned int ioas_id, @@ -742,7 +741,7 @@ static inline bool iommufd_selftest_is_mock_dev(struct device *dev) } static inline int iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, - struct dma_buf_phys_vec *phys) + struct phys_vec *phys) { return -EOPNOTSUPP; } diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index f606148920fa..f863fea75b98 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -1078,7 +1078,7 @@ static int pfn_reader_user_update_pinned(struct pfn_reader_user *user, } struct pfn_reader_dmabuf { - struct dma_buf_phys_vec phys; + struct phys_vec phys; unsigned long start_offset; }; @@ -1461,7 +1461,7 @@ static struct dma_buf_attach_ops iopt_dmabuf_attach_revoke_ops = { */ static int sym_vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, - struct dma_buf_phys_vec *phys) + struct phys_vec *phys) { typeof(&vfio_pci_dma_buf_iommufd_map) fn; int rc; diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 550ff36dec3a..989d8c4c60a7 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -2002,7 +2002,7 @@ static const struct dma_buf_ops iommufd_test_dmabuf_ops = { }; int iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, - struct dma_buf_phys_vec *phys) + struct phys_vec *phys) { struct iommufd_test_dma_buf *priv = attachment->dmabuf->priv; diff --git a/drivers/vfio/fsl-mc/Kconfig b/drivers/vfio/fsl-mc/Kconfig index 43c145d17971..7d1d690348f0 100644 --- a/drivers/vfio/fsl-mc/Kconfig +++ b/drivers/vfio/fsl-mc/Kconfig @@ -2,12 +2,9 @@ menu "VFIO support for FSL_MC bus devices" depends on FSL_MC_BUS config VFIO_FSL_MC - tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices (DEPRECATED)" + tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices" select EVENTFD help - The vfio-fsl-mc driver is deprecated and will be removed in a - future kernel release. - Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc (Management Complex) devices. This is required to passthrough fsl-mc bus devices using the VFIO framework. diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index ba47100f28c1..3985613e6830 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -531,8 +531,6 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev) struct device *dev = &mc_dev->dev; int ret; - dev_err_once(dev, "DEPRECATION: vfio-fsl-mc is deprecated and will be removed in a future kernel release\n"); - vdev = vfio_alloc_device(vfio_fsl_mc_device, vdev, dev, &vfio_fsl_mc_ops); if (IS_ERR(vdev)) diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c index e44bb44c581e..b2596020e62f 100644 --- a/drivers/vfio/mdev/mdev_sysfs.c +++ b/drivers/vfio/mdev/mdev_sysfs.c @@ -156,7 +156,7 @@ static void mdev_type_release(struct kobject *kobj) struct mdev_type *type = to_mdev_type(kobj); pr_debug("Releasing group %s\n", kobj->name); - /* Pairs with the get in add_mdev_supported_type() */ + /* Pairs with the get in mdev_type_add() */ put_device(type->parent->dev); } diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index cf45f6370c36..e61df3fe0db9 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -426,7 +426,7 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev, ret = qm_get_vft(vf_qm, &vf_qm->qp_base); if (ret <= 0) { dev_err(dev, "failed to get vft qp nums\n"); - return ret; + return ret < 0 ? ret : -EINVAL; } if (ret != vf_data->qp_num) { @@ -1188,12 +1188,34 @@ hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev, return 0; } +static void hisi_acc_vf_pci_reset_prepare(struct pci_dev *pdev) +{ + struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev); + struct hisi_qm *qm = hisi_acc_vdev->pf_qm; + struct device *dev = &qm->pdev->dev; + u32 delay = 0; + + /* All reset requests need to be queued for processing */ + while (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) { + msleep(1); + if (++delay > QM_RESET_WAIT_TIMEOUT) { + dev_err(dev, "reset prepare failed\n"); + return; + } + } + + hisi_acc_vdev->set_reset_flag = true; +} + static void hisi_acc_vf_pci_aer_reset_done(struct pci_dev *pdev) { struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev); + struct hisi_qm *qm = hisi_acc_vdev->pf_qm; + + if (hisi_acc_vdev->set_reset_flag) + clear_bit(QM_RESETTING, &qm->misc_ctl); - if (hisi_acc_vdev->core_device.vdev.migration_flags != - VFIO_MIGRATION_STOP_COPY) + if (!hisi_acc_vdev->core_device.vdev.mig_ops) return; mutex_lock(&hisi_acc_vdev->state_mutex); @@ -1547,6 +1569,7 @@ static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev) } hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING; hisi_acc_vdev->dev_opened = true; + hisi_acc_vdev->match_done = 0; mutex_unlock(&hisi_acc_vdev->open_mutex); } @@ -1734,6 +1757,7 @@ static const struct pci_device_id hisi_acc_vfio_pci_table[] = { MODULE_DEVICE_TABLE(pci, hisi_acc_vfio_pci_table); static const struct pci_error_handlers hisi_acc_vf_err_handlers = { + .reset_prepare = hisi_acc_vf_pci_reset_prepare, .reset_done = hisi_acc_vf_pci_aer_reset_done, .error_detected = vfio_pci_core_aer_err_detected, }; diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h index cd55eba64dfb..a3d91a31e3d8 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h @@ -27,6 +27,7 @@ #define ERROR_CHECK_TIMEOUT 100 #define CHECK_DELAY_TIME 100 +#define QM_RESET_WAIT_TIMEOUT 60000 #define QM_SQC_VFT_BASE_SHIFT_V2 28 #define QM_SQC_VFT_BASE_MASK_V2 GENMASK(15, 0) @@ -128,6 +129,7 @@ struct hisi_acc_vf_migration_file { struct hisi_acc_vf_core_device { struct vfio_pci_core_device core_device; u8 match_done; + bool set_reset_flag; /* * io_base is only valid when dev_opened is true, * which is protected by open_mutex. diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c index b45a24d00387..fa056b69f899 100644 --- a/drivers/vfio/pci/nvgrace-gpu/main.c +++ b/drivers/vfio/pci/nvgrace-gpu/main.c @@ -9,6 +9,7 @@ #include <linux/jiffies.h> #include <linux/pci-p2pdma.h> #include <linux/pm_runtime.h> +#include <linux/memory-failure.h> /* * The device memory usable to the workloads running in the VM is cached @@ -49,6 +50,7 @@ struct mem_region { void *memaddr; void __iomem *ioaddr; }; /* Base virtual address of the region */ + struct pfn_address_space pfn_address_space; }; struct nvgrace_gpu_pci_core_device { @@ -88,6 +90,80 @@ nvgrace_gpu_memregion(int index, return NULL; } +static int pfn_memregion_offset(struct nvgrace_gpu_pci_core_device *nvdev, + unsigned int index, + unsigned long pfn, + pgoff_t *pfn_offset_in_region) +{ + struct mem_region *region; + unsigned long start_pfn, num_pages; + + region = nvgrace_gpu_memregion(index, nvdev); + if (!region) + return -EINVAL; + + start_pfn = PHYS_PFN(region->memphys); + num_pages = region->memlength >> PAGE_SHIFT; + + if (pfn < start_pfn || pfn >= start_pfn + num_pages) + return -EFAULT; + + *pfn_offset_in_region = pfn - start_pfn; + + return 0; +} + +static inline +struct nvgrace_gpu_pci_core_device *vma_to_nvdev(struct vm_area_struct *vma); + +static int nvgrace_gpu_pfn_to_vma_pgoff(struct vm_area_struct *vma, + unsigned long pfn, + pgoff_t *pgoff) +{ + struct nvgrace_gpu_pci_core_device *nvdev; + unsigned int index = + vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); + pgoff_t vma_offset_in_region = vma->vm_pgoff & + ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); + pgoff_t pfn_offset_in_region; + int ret; + + nvdev = vma_to_nvdev(vma); + if (!nvdev) + return -ENOENT; + + ret = pfn_memregion_offset(nvdev, index, pfn, &pfn_offset_in_region); + if (ret) + return ret; + + /* Ensure PFN is not before VMA's start within the region */ + if (pfn_offset_in_region < vma_offset_in_region) + return -EFAULT; + + /* Calculate offset from VMA start */ + *pgoff = vma->vm_pgoff + + (pfn_offset_in_region - vma_offset_in_region); + + return 0; +} + +static int +nvgrace_gpu_vfio_pci_register_pfn_range(struct vfio_device *core_vdev, + struct mem_region *region) +{ + unsigned long pfn, nr_pages; + + pfn = PHYS_PFN(region->memphys); + nr_pages = region->memlength >> PAGE_SHIFT; + + region->pfn_address_space.node.start = pfn; + region->pfn_address_space.node.last = pfn + nr_pages - 1; + region->pfn_address_space.mapping = core_vdev->inode->i_mapping; + region->pfn_address_space.pfn_to_vma_pgoff = nvgrace_gpu_pfn_to_vma_pgoff; + + return register_pfn_address_space(®ion->pfn_address_space); +} + static int nvgrace_gpu_open_device(struct vfio_device *core_vdev) { struct vfio_pci_core_device *vdev = @@ -114,14 +190,28 @@ static int nvgrace_gpu_open_device(struct vfio_device *core_vdev) * memory mapping. */ ret = vfio_pci_core_setup_barmap(vdev, 0); - if (ret) { - vfio_pci_core_disable(vdev); - return ret; + if (ret) + goto error_exit; + + if (nvdev->resmem.memlength) { + ret = nvgrace_gpu_vfio_pci_register_pfn_range(core_vdev, &nvdev->resmem); + if (ret && ret != -EOPNOTSUPP) + goto error_exit; } - vfio_pci_core_finish_enable(vdev); + ret = nvgrace_gpu_vfio_pci_register_pfn_range(core_vdev, &nvdev->usemem); + if (ret && ret != -EOPNOTSUPP) + goto register_mem_failed; + vfio_pci_core_finish_enable(vdev); return 0; + +register_mem_failed: + if (nvdev->resmem.memlength) + unregister_pfn_address_space(&nvdev->resmem.pfn_address_space); +error_exit: + vfio_pci_core_disable(vdev); + return ret; } static void nvgrace_gpu_close_device(struct vfio_device *core_vdev) @@ -130,6 +220,11 @@ static void nvgrace_gpu_close_device(struct vfio_device *core_vdev) container_of(core_vdev, struct nvgrace_gpu_pci_core_device, core_device.vdev); + if (nvdev->resmem.memlength) + unregister_pfn_address_space(&nvdev->resmem.pfn_address_space); + + unregister_pfn_address_space(&nvdev->usemem.pfn_address_space); + /* Unmap the mapping to the device memory cached region */ if (nvdev->usemem.memaddr) { memunmap(nvdev->usemem.memaddr); @@ -247,6 +342,16 @@ static const struct vm_operations_struct nvgrace_gpu_vfio_pci_mmap_ops = { #endif }; +static inline +struct nvgrace_gpu_pci_core_device *vma_to_nvdev(struct vm_area_struct *vma) +{ + /* Check if this VMA belongs to us */ + if (vma->vm_ops != &nvgrace_gpu_vfio_pci_mmap_ops) + return NULL; + + return vma->vm_private_data; +} + static int nvgrace_gpu_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma) { @@ -784,7 +889,7 @@ nvgrace_gpu_write(struct vfio_device *core_vdev, static int nvgrace_get_dmabuf_phys(struct vfio_pci_core_device *core_vdev, struct p2pdma_provider **provider, unsigned int region_index, - struct dma_buf_phys_vec *phys_vec, + struct phys_vec *phys_vec, struct vfio_region_dma_range *dma_ranges, size_t nr_ranges) { diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 3a11e6f450f7..72c33b399800 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -588,6 +588,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_enable); void vfio_pci_core_disable(struct vfio_pci_core_device *vdev) { + struct pci_dev *bridge; struct pci_dev *pdev = vdev->pdev; struct vfio_pci_dummy_resource *dummy_res, *tmp; struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp; @@ -694,12 +695,20 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev) * We can not use the "try" reset interface here, which will * overwrite the previously restored configuration information. */ - if (vdev->reset_works && pci_dev_trylock(pdev)) { - if (!__pci_reset_function_locked(pdev)) - vdev->needs_reset = false; - pci_dev_unlock(pdev); + if (vdev->reset_works) { + bridge = pci_upstream_bridge(pdev); + if (bridge && !pci_dev_trylock(bridge)) + goto out_restore_state; + if (pci_dev_trylock(pdev)) { + if (!__pci_reset_function_locked(pdev)) + vdev->needs_reset = false; + pci_dev_unlock(pdev); + } + if (bridge) + pci_dev_unlock(bridge); } +out_restore_state: pci_restore_state(pdev); out: pci_disable_device(pdev); diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c index 4be4a85005cb..9918713d5774 100644 --- a/drivers/vfio/pci/vfio_pci_dmabuf.c +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c @@ -14,7 +14,7 @@ struct vfio_pci_dma_buf { struct vfio_pci_core_device *vdev; struct list_head dmabufs_elm; size_t size; - struct dma_buf_phys_vec *phys_vec; + struct phys_vec *phys_vec; struct p2pdma_provider *provider; u32 nr_ranges; u8 revoked : 1; @@ -106,7 +106,7 @@ static const struct dma_buf_ops vfio_pci_dmabuf_ops = { * will fail if it is currently revoked */ int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, - struct dma_buf_phys_vec *phys) + struct phys_vec *phys) { struct vfio_pci_dma_buf *priv; @@ -128,7 +128,7 @@ int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, } EXPORT_SYMBOL_FOR_MODULES(vfio_pci_dma_buf_iommufd_map, "iommufd"); -int vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec, +int vfio_pci_core_fill_phys_vec(struct phys_vec *phys_vec, struct vfio_region_dma_range *dma_ranges, size_t nr_ranges, phys_addr_t start, phys_addr_t len) @@ -160,7 +160,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_fill_phys_vec); int vfio_pci_core_get_dmabuf_phys(struct vfio_pci_core_device *vdev, struct p2pdma_provider **provider, unsigned int region_index, - struct dma_buf_phys_vec *phys_vec, + struct phys_vec *phys_vec, struct vfio_region_dma_range *dma_ranges, size_t nr_ranges) { diff --git a/include/linux/dma-buf-mapping.h b/include/linux/dma-buf-mapping.h index a3c0ce2d3a42..09bde3f748e4 100644 --- a/include/linux/dma-buf-mapping.h +++ b/include/linux/dma-buf-mapping.h @@ -9,7 +9,7 @@ struct sg_table *dma_buf_phys_vec_to_sgt(struct dma_buf_attachment *attach, struct p2pdma_provider *provider, - struct dma_buf_phys_vec *phys_vec, + struct phys_vec *phys_vec, size_t nr_ranges, size_t size, enum dma_data_direction dir); void dma_buf_free_sgt(struct dma_buf_attachment *attach, struct sg_table *sgt, diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 91f4939db89b..133b9e637b55 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -520,16 +520,6 @@ struct dma_buf_export_info { }; /** - * struct dma_buf_phys_vec - describe continuous chunk of memory - * @paddr: physical address of that chunk - * @len: Length of this chunk - */ -struct dma_buf_phys_vec { - phys_addr_t paddr; - size_t len; -}; - -/** * DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters * @name: export-info name * diff --git a/include/linux/memory-failure.h b/include/linux/memory-failure.h index 7b5e11cf905f..d333dcdbeae7 100644 --- a/include/linux/memory-failure.h +++ b/include/linux/memory-failure.h @@ -4,8 +4,6 @@ #include <linux/interval_tree.h> -struct pfn_address_space; - struct pfn_address_space { struct interval_tree_node node; struct address_space *mapping; @@ -13,7 +11,18 @@ struct pfn_address_space { unsigned long pfn, pgoff_t *pgoff); }; +#ifdef CONFIG_MEMORY_FAILURE int register_pfn_address_space(struct pfn_address_space *pfn_space); void unregister_pfn_address_space(struct pfn_address_space *pfn_space); +#else +static inline int register_pfn_address_space(struct pfn_address_space *pfn_space) +{ + return -EOPNOTSUPP; +} + +static inline void unregister_pfn_address_space(struct pfn_address_space *pfn_space) +{ +} +#endif /* CONFIG_MEMORY_FAILURE */ #endif /* _LINUX_MEMORY_FAILURE_H */ diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 1ac86896875c..2ebba746c18f 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -28,7 +28,6 @@ struct vfio_pci_core_device; struct vfio_pci_region; struct p2pdma_provider; -struct dma_buf_phys_vec; struct dma_buf_attachment; struct vfio_pci_eventfd { @@ -62,25 +61,25 @@ struct vfio_pci_device_ops { int (*get_dmabuf_phys)(struct vfio_pci_core_device *vdev, struct p2pdma_provider **provider, unsigned int region_index, - struct dma_buf_phys_vec *phys_vec, + struct phys_vec *phys_vec, struct vfio_region_dma_range *dma_ranges, size_t nr_ranges); }; #if IS_ENABLED(CONFIG_VFIO_PCI_DMABUF) -int vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec, +int vfio_pci_core_fill_phys_vec(struct phys_vec *phys_vec, struct vfio_region_dma_range *dma_ranges, size_t nr_ranges, phys_addr_t start, phys_addr_t len); int vfio_pci_core_get_dmabuf_phys(struct vfio_pci_core_device *vdev, struct p2pdma_provider **provider, unsigned int region_index, - struct dma_buf_phys_vec *phys_vec, + struct phys_vec *phys_vec, struct vfio_region_dma_range *dma_ranges, size_t nr_ranges); #else static inline int -vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec, +vfio_pci_core_fill_phys_vec(struct phys_vec *phys_vec, struct vfio_region_dma_range *dma_ranges, size_t nr_ranges, phys_addr_t start, phys_addr_t len) @@ -89,7 +88,7 @@ vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec, } static inline int vfio_pci_core_get_dmabuf_phys( struct vfio_pci_core_device *vdev, struct p2pdma_provider **provider, - unsigned int region_index, struct dma_buf_phys_vec *phys_vec, + unsigned int region_index, struct phys_vec *phys_vec, struct vfio_region_dma_range *dma_ranges, size_t nr_ranges) { return -EOPNOTSUPP; @@ -236,6 +235,6 @@ static inline bool is_aligned_for_order(struct vm_area_struct *vma, } int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, - struct dma_buf_phys_vec *phys); + struct phys_vec *phys); #endif /* VFIO_PCI_CORE_H */ diff --git a/tools/testing/selftests/vfio/Makefile b/tools/testing/selftests/vfio/Makefile index 3c796ca99a50..8e90e409e91d 100644 --- a/tools/testing/selftests/vfio/Makefile +++ b/tools/testing/selftests/vfio/Makefile @@ -1,5 +1,13 @@ +ARCH ?= $(shell uname -m) + +ifeq (,$(filter $(ARCH),arm64 x86_64)) +# Do nothing on unsupported architectures +include ../lib.mk +else + CFLAGS = $(KHDR_INCLUDES) TEST_GEN_PROGS += vfio_dma_mapping_test +TEST_GEN_PROGS += vfio_dma_mapping_mmio_test TEST_GEN_PROGS += vfio_iommufd_setup_test TEST_GEN_PROGS += vfio_pci_device_test TEST_GEN_PROGS += vfio_pci_device_init_perf_test @@ -27,3 +35,5 @@ TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_PROGS_O) $(LIBVFIO_O)) -include $(TEST_DEP_FILES) EXTRA_CLEAN += $(TEST_GEN_PROGS_O) $(TEST_DEP_FILES) + +endif diff --git a/tools/testing/selftests/vfio/lib/include/libvfio.h b/tools/testing/selftests/vfio/lib/include/libvfio.h index 279ddcd70194..1b6da54cc2cb 100644 --- a/tools/testing/selftests/vfio/lib/include/libvfio.h +++ b/tools/testing/selftests/vfio/lib/include/libvfio.h @@ -23,4 +23,13 @@ const char *vfio_selftests_get_bdf(int *argc, char *argv[]); char **vfio_selftests_get_bdfs(int *argc, char *argv[], int *nr_bdfs); +/* + * Reserve virtual address space of size at an address satisfying + * (vaddr % align) == offset. + * + * Returns the reserved vaddr. The caller is responsible for unmapping + * the returned region. + */ +void *mmap_reserve(size_t size, size_t align, size_t offset); + #endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H */ diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h index 5c9b9dc6d993..e9a3386a4719 100644 --- a/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h +++ b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h @@ -61,6 +61,12 @@ iova_t iommu_hva2iova(struct iommu *iommu, void *vaddr); struct iommu_iova_range *iommu_iova_ranges(struct iommu *iommu, u32 *nranges); +#define MODE_VFIO_TYPE1_IOMMU "vfio_type1_iommu" +#define MODE_VFIO_TYPE1V2_IOMMU "vfio_type1v2_iommu" +#define MODE_IOMMUFD_COMPAT_TYPE1 "iommufd_compat_type1" +#define MODE_IOMMUFD_COMPAT_TYPE1V2 "iommufd_compat_type1v2" +#define MODE_IOMMUFD "iommufd" + /* * Generator for VFIO selftests fixture variants that replicate across all * possible IOMMU modes. Tests must define FIXTURE_VARIANT_ADD_IOMMU_MODE() diff --git a/tools/testing/selftests/vfio/lib/iommu.c b/tools/testing/selftests/vfio/lib/iommu.c index 58b7fb7430d4..035dac069d60 100644 --- a/tools/testing/selftests/vfio/lib/iommu.c +++ b/tools/testing/selftests/vfio/lib/iommu.c @@ -20,32 +20,32 @@ #include "../../../kselftest.h" #include <libvfio.h> -const char *default_iommu_mode = "iommufd"; +const char *default_iommu_mode = MODE_IOMMUFD; /* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */ static const struct iommu_mode iommu_modes[] = { { - .name = "vfio_type1_iommu", + .name = MODE_VFIO_TYPE1_IOMMU, .container_path = "/dev/vfio/vfio", .iommu_type = VFIO_TYPE1_IOMMU, }, { - .name = "vfio_type1v2_iommu", + .name = MODE_VFIO_TYPE1V2_IOMMU, .container_path = "/dev/vfio/vfio", .iommu_type = VFIO_TYPE1v2_IOMMU, }, { - .name = "iommufd_compat_type1", + .name = MODE_IOMMUFD_COMPAT_TYPE1, .container_path = "/dev/iommu", .iommu_type = VFIO_TYPE1_IOMMU, }, { - .name = "iommufd_compat_type1v2", + .name = MODE_IOMMUFD_COMPAT_TYPE1V2, .container_path = "/dev/iommu", .iommu_type = VFIO_TYPE1v2_IOMMU, }, { - .name = "iommufd", + .name = MODE_IOMMUFD, }, }; diff --git a/tools/testing/selftests/vfio/lib/libvfio.c b/tools/testing/selftests/vfio/lib/libvfio.c index a23a3cc5be69..3a3d1ed635c1 100644 --- a/tools/testing/selftests/vfio/lib/libvfio.c +++ b/tools/testing/selftests/vfio/lib/libvfio.c @@ -2,6 +2,9 @@ #include <stdio.h> #include <stdlib.h> +#include <sys/mman.h> + +#include <linux/align.h> #include "../../../kselftest.h" #include <libvfio.h> @@ -76,3 +79,25 @@ const char *vfio_selftests_get_bdf(int *argc, char *argv[]) return vfio_selftests_get_bdfs(argc, argv, &nr_bdfs)[0]; } + +void *mmap_reserve(size_t size, size_t align, size_t offset) +{ + void *map_base, *map_align; + size_t delta; + + VFIO_ASSERT_GT(align, offset); + delta = align - offset; + + map_base = mmap(NULL, size + align, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + VFIO_ASSERT_NE(map_base, MAP_FAILED); + + map_align = (void *)(ALIGN((uintptr_t)map_base + delta, align) - delta); + + if (map_align > map_base) + VFIO_ASSERT_EQ(munmap(map_base, map_align - map_base), 0); + + VFIO_ASSERT_EQ(munmap(map_align + size, map_base + align - map_align), 0); + + return map_align; +} diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c index fac4c0ecadef..4e5871f1ebc3 100644 --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c @@ -11,10 +11,14 @@ #include <sys/ioctl.h> #include <sys/mman.h> +#include <linux/align.h> #include <linux/iommufd.h> +#include <linux/kernel.h> #include <linux/limits.h> +#include <linux/log2.h> #include <linux/mman.h> #include <linux/overflow.h> +#include <linux/sizes.h> #include <linux/types.h> #include <linux/vfio.h> @@ -123,20 +127,38 @@ static void vfio_pci_region_get(struct vfio_pci_device *device, int index, static void vfio_pci_bar_map(struct vfio_pci_device *device, int index) { struct vfio_pci_bar *bar = &device->bars[index]; + size_t align, size; int prot = 0; + void *vaddr; VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS); VFIO_ASSERT_NULL(bar->vaddr); VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP); + VFIO_ASSERT_TRUE(is_power_of_2(bar->info.size)); if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ) prot |= PROT_READ; if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE) prot |= PROT_WRITE; - bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED, + size = bar->info.size; + + /* + * Align BAR mmaps to improve page fault granularity during potential + * subsequent IOMMU mapping of these BAR vaddr. 1G for x86 is the + * largest hugepage size across any architecture, so no benefit from + * larger alignment. BARs smaller than 1G will be aligned by their + * power-of-two size, guaranteeing sufficient alignment for smaller + * hugepages, if present. + */ + align = min_t(size_t, size, SZ_1G); + + vaddr = mmap_reserve(size, align, 0); + bar->vaddr = mmap(vaddr, size, prot, MAP_SHARED | MAP_FIXED, device->fd, bar->info.offset); VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED); + + madvise(bar->vaddr, size, MADV_HUGEPAGE); } static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index) diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_mmio_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_mmio_test.c new file mode 100644 index 000000000000..957a89ce7b3a --- /dev/null +++ b/tools/testing/selftests/vfio/vfio_dma_mapping_mmio_test.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stdio.h> +#include <sys/mman.h> +#include <unistd.h> + +#include <uapi/linux/types.h> +#include <linux/pci_regs.h> +#include <linux/sizes.h> +#include <linux/vfio.h> + +#include <libvfio.h> + +#include "../kselftest_harness.h" + +static const char *device_bdf; + +static struct vfio_pci_bar *largest_mapped_bar(struct vfio_pci_device *device) +{ + u32 flags = VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE; + struct vfio_pci_bar *largest = NULL; + u64 bar_size = 0; + + for (int i = 0; i < PCI_STD_NUM_BARS; i++) { + struct vfio_pci_bar *bar = &device->bars[i]; + + if (!bar->vaddr) + continue; + + /* + * iommu_map() maps with READ|WRITE, so require the same + * abilities for the underlying VFIO region. + */ + if ((bar->info.flags & flags) != flags) + continue; + + if (bar->info.size > bar_size) { + bar_size = bar->info.size; + largest = bar; + } + } + + return largest; +} + +FIXTURE(vfio_dma_mapping_mmio_test) { + struct iommu *iommu; + struct vfio_pci_device *device; + struct iova_allocator *iova_allocator; + struct vfio_pci_bar *bar; +}; + +FIXTURE_VARIANT(vfio_dma_mapping_mmio_test) { + const char *iommu_mode; +}; + +#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \ +FIXTURE_VARIANT_ADD(vfio_dma_mapping_mmio_test, _iommu_mode) { \ + .iommu_mode = #_iommu_mode, \ +} + +FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(); + +#undef FIXTURE_VARIANT_ADD_IOMMU_MODE + +FIXTURE_SETUP(vfio_dma_mapping_mmio_test) +{ + self->iommu = iommu_init(variant->iommu_mode); + self->device = vfio_pci_device_init(device_bdf, self->iommu); + self->iova_allocator = iova_allocator_init(self->iommu); + self->bar = largest_mapped_bar(self->device); + + if (!self->bar) + SKIP(return, "No mappable BAR found on device %s", device_bdf); +} + +FIXTURE_TEARDOWN(vfio_dma_mapping_mmio_test) +{ + iova_allocator_cleanup(self->iova_allocator); + vfio_pci_device_cleanup(self->device); + iommu_cleanup(self->iommu); +} + +static void do_mmio_map_test(struct iommu *iommu, + struct iova_allocator *iova_allocator, + void *vaddr, size_t size) +{ + struct dma_region region = { + .vaddr = vaddr, + .size = size, + .iova = iova_allocator_alloc(iova_allocator, size), + }; + + /* + * NOTE: Check for iommufd compat success once it lands. Native iommufd + * will never support this. + */ + if (!strcmp(iommu->mode->name, MODE_VFIO_TYPE1V2_IOMMU) || + !strcmp(iommu->mode->name, MODE_VFIO_TYPE1_IOMMU)) { + iommu_map(iommu, ®ion); + iommu_unmap(iommu, ®ion); + } else { + VFIO_ASSERT_NE(__iommu_map(iommu, ®ion), 0); + VFIO_ASSERT_NE(__iommu_unmap(iommu, ®ion, NULL), 0); + } +} + +TEST_F(vfio_dma_mapping_mmio_test, map_full_bar) +{ + do_mmio_map_test(self->iommu, self->iova_allocator, + self->bar->vaddr, self->bar->info.size); +} + +TEST_F(vfio_dma_mapping_mmio_test, map_partial_bar) +{ + if (self->bar->info.size < 2 * getpagesize()) + SKIP(return, "BAR too small (size=0x%llx)", self->bar->info.size); + + do_mmio_map_test(self->iommu, self->iova_allocator, + self->bar->vaddr, getpagesize()); +} + +/* Test IOMMU mapping of BAR mmap with intentionally poor vaddr alignment. */ +TEST_F(vfio_dma_mapping_mmio_test, map_bar_misaligned) +{ + /* Limit size to bound test time for large BARs */ + size_t size = min_t(size_t, self->bar->info.size, SZ_1G); + void *vaddr; + + vaddr = mmap_reserve(size, SZ_1G, getpagesize()); + vaddr = mmap(vaddr, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, + self->device->fd, self->bar->info.offset); + VFIO_ASSERT_NE(vaddr, MAP_FAILED); + + do_mmio_map_test(self->iommu, self->iova_allocator, vaddr, size); + + VFIO_ASSERT_EQ(munmap(vaddr, size), 0); +} + +int main(int argc, char *argv[]) +{ + device_bdf = vfio_selftests_get_bdf(&argc, argv); + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c index 3bf984b337ac..abb170bdcef7 100644 --- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c +++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c @@ -161,12 +161,8 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap) if (rc == -EOPNOTSUPP) goto unmap; - /* - * IOMMUFD compatibility-mode does not support huge mappings when - * using VFIO_TYPE1_IOMMU. - */ - if (!strcmp(variant->iommu_mode, "iommufd_compat_type1")) - mapping_size = SZ_4K; + if (self->iommu->mode->iommu_type == VFIO_TYPE1_IOMMU) + goto unmap; ASSERT_EQ(0, rc); printf("Found IOMMU mappings for IOVA 0x%lx:\n", region.iova); |
