summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-12 15:52:39 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-12 15:52:39 -0800
commitcebcffe666cc82e68842e27852a019ca54072cb7 (patch)
tree92f6abbab9aacbfb1babd42614a8513bf2f0371c
parentd4a379a52c3c2dc44366c4f6722c063a7d0de179 (diff)
parent96ca4caf9066f5ebd35b561a521af588a8eb0215 (diff)
Merge tag 'vfio-v7.0-rc1' of https://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson: "A small cycle with the bulk in selftests and reintroducing poison handling in the nvgrace-gpu driver. The rest are fixes, cleanups, and some dmabuf structure consolidation. - Update outdated mdev comment referencing the renamed mdev_type_add() function (Julia Lawall) - Introduce selftest support for IOMMU mapping of PCI MMIO BARs (Alex Mastro) - Relax selftest assertion relative to differences in huge page handling between legacy (v1) TYPE1 IOMMU mapping behavior and the compatibility mode supported by IOMMUFD (David Matlack) - Reintroduce memory poison handling support for non-struct-page- backed memory in the nvgrace-gpu variant driver (Ankit Agrawal) - Replace dma_buf_phys_vec with phys_vec to avoid duplicate structure and semantics (Leon Romanovsky) - Add missing upstream bridge locking across PCI function reset, resolving an assertion failure when secondary bus reset is used to provide that reset (Anthony Pighin) - Fixes to hisi_acc vfio-pci variant driver to resolve corner case issues related to resets, repeated migration, and error injection scenarios (Longfang Liu, Weili Qian) - Restrict vfio selftest builds to arm64 and x86_64, resolving compiler warnings on 32-bit archs (Ted Logan) - Un-deprecate the fsl-mc vfio bus driver as a new maintainer has stepped up (Ioana Ciornei)" * tag 'vfio-v7.0-rc1' of https://github.com/awilliam/linux-vfio: vfio/fsl-mc: add myself as maintainer vfio: selftests: only build tests on arm64 and x86_64 hisi_acc_vfio_pci: fix the queue parameter anomaly issue hisi_acc_vfio_pci: resolve duplicate migration states hisi_acc_vfio_pci: update status after RAS error hisi_acc_vfio_pci: fix VF reset timeout issue vfio/pci: Lock upstream bridge for vfio_pci_core_disable() types: reuse common phys_vec type instead of DMABUF open‑coded variant vfio/nvgrace-gpu: register device memory for poison handling mm: add stubs for PFNMAP memory failure registration functions vfio: selftests: Drop IOMMU mapping size assertions for VFIO_TYPE1_IOMMU vfio: selftests: Add vfio_dma_mapping_mmio_test vfio: selftests: Align BAR mmaps for efficient IOMMU mapping vfio: selftests: Centralize IOMMU mode name definitions vfio/mdev: update outdated comment
-rw-r--r--MAINTAINERS3
-rw-r--r--drivers/dma-buf/dma-buf-mapping.c6
-rw-r--r--drivers/iommu/iommufd/io_pagetable.h2
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h5
-rw-r--r--drivers/iommu/iommufd/pages.c4
-rw-r--r--drivers/iommu/iommufd/selftest.c2
-rw-r--r--drivers/vfio/fsl-mc/Kconfig5
-rw-r--r--drivers/vfio/fsl-mc/vfio_fsl_mc.c2
-rw-r--r--drivers/vfio/mdev/mdev_sysfs.c2
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c30
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h2
-rw-r--r--drivers/vfio/pci/nvgrace-gpu/main.c115
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c17
-rw-r--r--drivers/vfio/pci/vfio_pci_dmabuf.c8
-rw-r--r--include/linux/dma-buf-mapping.h2
-rw-r--r--include/linux/dma-buf.h10
-rw-r--r--include/linux/memory-failure.h13
-rw-r--r--include/linux/vfio_pci_core.h13
-rw-r--r--tools/testing/selftests/vfio/Makefile10
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio.h9
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iommu.h6
-rw-r--r--tools/testing/selftests/vfio/lib/iommu.c12
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.c25
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_device.c24
-rw-r--r--tools/testing/selftests/vfio/vfio_dma_mapping_mmio_test.c143
-rw-r--r--tools/testing/selftests/vfio/vfio_dma_mapping_test.c8
26 files changed, 411 insertions, 67 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 398045c00495..28d6fd75d43a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -27606,8 +27606,9 @@ F: include/uapi/linux/vfio.h
F: tools/testing/selftests/vfio/
VFIO FSL-MC DRIVER
+M: Ioana Ciornei <ioana.ciornei@nxp.com>
L: kvm@vger.kernel.org
-S: Obsolete
+S: Maintained
F: drivers/vfio/fsl-mc/
VFIO HISILICON PCI DRIVER
diff --git a/drivers/dma-buf/dma-buf-mapping.c b/drivers/dma-buf/dma-buf-mapping.c
index b7352e609fbd..174677faa577 100644
--- a/drivers/dma-buf/dma-buf-mapping.c
+++ b/drivers/dma-buf/dma-buf-mapping.c
@@ -33,8 +33,8 @@ static struct scatterlist *fill_sg_entry(struct scatterlist *sgl, size_t length,
}
static unsigned int calc_sg_nents(struct dma_iova_state *state,
- struct dma_buf_phys_vec *phys_vec,
- size_t nr_ranges, size_t size)
+ struct phys_vec *phys_vec, size_t nr_ranges,
+ size_t size)
{
unsigned int nents = 0;
size_t i;
@@ -91,7 +91,7 @@ struct dma_buf_dma {
*/
struct sg_table *dma_buf_phys_vec_to_sgt(struct dma_buf_attachment *attach,
struct p2pdma_provider *provider,
- struct dma_buf_phys_vec *phys_vec,
+ struct phys_vec *phys_vec,
size_t nr_ranges, size_t size,
enum dma_data_direction dir)
{
diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h
index 14cd052fd320..27e3e311d395 100644
--- a/drivers/iommu/iommufd/io_pagetable.h
+++ b/drivers/iommu/iommufd/io_pagetable.h
@@ -202,7 +202,7 @@ struct iopt_pages_dmabuf_track {
struct iopt_pages_dmabuf {
struct dma_buf_attachment *attach;
- struct dma_buf_phys_vec phys;
+ struct phys_vec phys;
/* Always PAGE_SIZE aligned */
unsigned long start;
struct list_head tracker;
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index eb6d1a70f673..6ac1965199e9 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -20,7 +20,6 @@ struct iommu_group;
struct iommu_option;
struct iommufd_device;
struct dma_buf_attachment;
-struct dma_buf_phys_vec;
struct iommufd_sw_msi_map {
struct list_head sw_msi_item;
@@ -718,7 +717,7 @@ int __init iommufd_test_init(void);
void iommufd_test_exit(void);
bool iommufd_selftest_is_mock_dev(struct device *dev);
int iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
- struct dma_buf_phys_vec *phys);
+ struct phys_vec *phys);
#else
static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
unsigned int ioas_id,
@@ -742,7 +741,7 @@ static inline bool iommufd_selftest_is_mock_dev(struct device *dev)
}
static inline int
iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
- struct dma_buf_phys_vec *phys)
+ struct phys_vec *phys)
{
return -EOPNOTSUPP;
}
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index f606148920fa..f863fea75b98 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -1078,7 +1078,7 @@ static int pfn_reader_user_update_pinned(struct pfn_reader_user *user,
}
struct pfn_reader_dmabuf {
- struct dma_buf_phys_vec phys;
+ struct phys_vec phys;
unsigned long start_offset;
};
@@ -1461,7 +1461,7 @@ static struct dma_buf_attach_ops iopt_dmabuf_attach_revoke_ops = {
*/
static int
sym_vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
- struct dma_buf_phys_vec *phys)
+ struct phys_vec *phys)
{
typeof(&vfio_pci_dma_buf_iommufd_map) fn;
int rc;
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index 550ff36dec3a..989d8c4c60a7 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -2002,7 +2002,7 @@ static const struct dma_buf_ops iommufd_test_dmabuf_ops = {
};
int iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
- struct dma_buf_phys_vec *phys)
+ struct phys_vec *phys)
{
struct iommufd_test_dma_buf *priv = attachment->dmabuf->priv;
diff --git a/drivers/vfio/fsl-mc/Kconfig b/drivers/vfio/fsl-mc/Kconfig
index 43c145d17971..7d1d690348f0 100644
--- a/drivers/vfio/fsl-mc/Kconfig
+++ b/drivers/vfio/fsl-mc/Kconfig
@@ -2,12 +2,9 @@ menu "VFIO support for FSL_MC bus devices"
depends on FSL_MC_BUS
config VFIO_FSL_MC
- tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices (DEPRECATED)"
+ tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices"
select EVENTFD
help
- The vfio-fsl-mc driver is deprecated and will be removed in a
- future kernel release.
-
Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc
(Management Complex) devices. This is required to passthrough
fsl-mc bus devices using the VFIO framework.
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index ba47100f28c1..3985613e6830 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -531,8 +531,6 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
struct device *dev = &mc_dev->dev;
int ret;
- dev_err_once(dev, "DEPRECATION: vfio-fsl-mc is deprecated and will be removed in a future kernel release\n");
-
vdev = vfio_alloc_device(vfio_fsl_mc_device, vdev, dev,
&vfio_fsl_mc_ops);
if (IS_ERR(vdev))
diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
index e44bb44c581e..b2596020e62f 100644
--- a/drivers/vfio/mdev/mdev_sysfs.c
+++ b/drivers/vfio/mdev/mdev_sysfs.c
@@ -156,7 +156,7 @@ static void mdev_type_release(struct kobject *kobj)
struct mdev_type *type = to_mdev_type(kobj);
pr_debug("Releasing group %s\n", kobj->name);
- /* Pairs with the get in add_mdev_supported_type() */
+ /* Pairs with the get in mdev_type_add() */
put_device(type->parent->dev);
}
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index cf45f6370c36..e61df3fe0db9 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -426,7 +426,7 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
ret = qm_get_vft(vf_qm, &vf_qm->qp_base);
if (ret <= 0) {
dev_err(dev, "failed to get vft qp nums\n");
- return ret;
+ return ret < 0 ? ret : -EINVAL;
}
if (ret != vf_data->qp_num) {
@@ -1188,12 +1188,34 @@ hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev,
return 0;
}
+static void hisi_acc_vf_pci_reset_prepare(struct pci_dev *pdev)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev);
+ struct hisi_qm *qm = hisi_acc_vdev->pf_qm;
+ struct device *dev = &qm->pdev->dev;
+ u32 delay = 0;
+
+ /* All reset requests need to be queued for processing */
+ while (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
+ msleep(1);
+ if (++delay > QM_RESET_WAIT_TIMEOUT) {
+ dev_err(dev, "reset prepare failed\n");
+ return;
+ }
+ }
+
+ hisi_acc_vdev->set_reset_flag = true;
+}
+
static void hisi_acc_vf_pci_aer_reset_done(struct pci_dev *pdev)
{
struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev);
+ struct hisi_qm *qm = hisi_acc_vdev->pf_qm;
+
+ if (hisi_acc_vdev->set_reset_flag)
+ clear_bit(QM_RESETTING, &qm->misc_ctl);
- if (hisi_acc_vdev->core_device.vdev.migration_flags !=
- VFIO_MIGRATION_STOP_COPY)
+ if (!hisi_acc_vdev->core_device.vdev.mig_ops)
return;
mutex_lock(&hisi_acc_vdev->state_mutex);
@@ -1547,6 +1569,7 @@ static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
}
hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
hisi_acc_vdev->dev_opened = true;
+ hisi_acc_vdev->match_done = 0;
mutex_unlock(&hisi_acc_vdev->open_mutex);
}
@@ -1734,6 +1757,7 @@ static const struct pci_device_id hisi_acc_vfio_pci_table[] = {
MODULE_DEVICE_TABLE(pci, hisi_acc_vfio_pci_table);
static const struct pci_error_handlers hisi_acc_vf_err_handlers = {
+ .reset_prepare = hisi_acc_vf_pci_reset_prepare,
.reset_done = hisi_acc_vf_pci_aer_reset_done,
.error_detected = vfio_pci_core_aer_err_detected,
};
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
index cd55eba64dfb..a3d91a31e3d8 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
@@ -27,6 +27,7 @@
#define ERROR_CHECK_TIMEOUT 100
#define CHECK_DELAY_TIME 100
+#define QM_RESET_WAIT_TIMEOUT 60000
#define QM_SQC_VFT_BASE_SHIFT_V2 28
#define QM_SQC_VFT_BASE_MASK_V2 GENMASK(15, 0)
@@ -128,6 +129,7 @@ struct hisi_acc_vf_migration_file {
struct hisi_acc_vf_core_device {
struct vfio_pci_core_device core_device;
u8 match_done;
+ bool set_reset_flag;
/*
* io_base is only valid when dev_opened is true,
* which is protected by open_mutex.
diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c
index b45a24d00387..fa056b69f899 100644
--- a/drivers/vfio/pci/nvgrace-gpu/main.c
+++ b/drivers/vfio/pci/nvgrace-gpu/main.c
@@ -9,6 +9,7 @@
#include <linux/jiffies.h>
#include <linux/pci-p2pdma.h>
#include <linux/pm_runtime.h>
+#include <linux/memory-failure.h>
/*
* The device memory usable to the workloads running in the VM is cached
@@ -49,6 +50,7 @@ struct mem_region {
void *memaddr;
void __iomem *ioaddr;
}; /* Base virtual address of the region */
+ struct pfn_address_space pfn_address_space;
};
struct nvgrace_gpu_pci_core_device {
@@ -88,6 +90,80 @@ nvgrace_gpu_memregion(int index,
return NULL;
}
+static int pfn_memregion_offset(struct nvgrace_gpu_pci_core_device *nvdev,
+ unsigned int index,
+ unsigned long pfn,
+ pgoff_t *pfn_offset_in_region)
+{
+ struct mem_region *region;
+ unsigned long start_pfn, num_pages;
+
+ region = nvgrace_gpu_memregion(index, nvdev);
+ if (!region)
+ return -EINVAL;
+
+ start_pfn = PHYS_PFN(region->memphys);
+ num_pages = region->memlength >> PAGE_SHIFT;
+
+ if (pfn < start_pfn || pfn >= start_pfn + num_pages)
+ return -EFAULT;
+
+ *pfn_offset_in_region = pfn - start_pfn;
+
+ return 0;
+}
+
+static inline
+struct nvgrace_gpu_pci_core_device *vma_to_nvdev(struct vm_area_struct *vma);
+
+static int nvgrace_gpu_pfn_to_vma_pgoff(struct vm_area_struct *vma,
+ unsigned long pfn,
+ pgoff_t *pgoff)
+{
+ struct nvgrace_gpu_pci_core_device *nvdev;
+ unsigned int index =
+ vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+ pgoff_t vma_offset_in_region = vma->vm_pgoff &
+ ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+ pgoff_t pfn_offset_in_region;
+ int ret;
+
+ nvdev = vma_to_nvdev(vma);
+ if (!nvdev)
+ return -ENOENT;
+
+ ret = pfn_memregion_offset(nvdev, index, pfn, &pfn_offset_in_region);
+ if (ret)
+ return ret;
+
+ /* Ensure PFN is not before VMA's start within the region */
+ if (pfn_offset_in_region < vma_offset_in_region)
+ return -EFAULT;
+
+ /* Calculate offset from VMA start */
+ *pgoff = vma->vm_pgoff +
+ (pfn_offset_in_region - vma_offset_in_region);
+
+ return 0;
+}
+
+static int
+nvgrace_gpu_vfio_pci_register_pfn_range(struct vfio_device *core_vdev,
+ struct mem_region *region)
+{
+ unsigned long pfn, nr_pages;
+
+ pfn = PHYS_PFN(region->memphys);
+ nr_pages = region->memlength >> PAGE_SHIFT;
+
+ region->pfn_address_space.node.start = pfn;
+ region->pfn_address_space.node.last = pfn + nr_pages - 1;
+ region->pfn_address_space.mapping = core_vdev->inode->i_mapping;
+ region->pfn_address_space.pfn_to_vma_pgoff = nvgrace_gpu_pfn_to_vma_pgoff;
+
+ return register_pfn_address_space(&region->pfn_address_space);
+}
+
static int nvgrace_gpu_open_device(struct vfio_device *core_vdev)
{
struct vfio_pci_core_device *vdev =
@@ -114,14 +190,28 @@ static int nvgrace_gpu_open_device(struct vfio_device *core_vdev)
* memory mapping.
*/
ret = vfio_pci_core_setup_barmap(vdev, 0);
- if (ret) {
- vfio_pci_core_disable(vdev);
- return ret;
+ if (ret)
+ goto error_exit;
+
+ if (nvdev->resmem.memlength) {
+ ret = nvgrace_gpu_vfio_pci_register_pfn_range(core_vdev, &nvdev->resmem);
+ if (ret && ret != -EOPNOTSUPP)
+ goto error_exit;
}
- vfio_pci_core_finish_enable(vdev);
+ ret = nvgrace_gpu_vfio_pci_register_pfn_range(core_vdev, &nvdev->usemem);
+ if (ret && ret != -EOPNOTSUPP)
+ goto register_mem_failed;
+ vfio_pci_core_finish_enable(vdev);
return 0;
+
+register_mem_failed:
+ if (nvdev->resmem.memlength)
+ unregister_pfn_address_space(&nvdev->resmem.pfn_address_space);
+error_exit:
+ vfio_pci_core_disable(vdev);
+ return ret;
}
static void nvgrace_gpu_close_device(struct vfio_device *core_vdev)
@@ -130,6 +220,11 @@ static void nvgrace_gpu_close_device(struct vfio_device *core_vdev)
container_of(core_vdev, struct nvgrace_gpu_pci_core_device,
core_device.vdev);
+ if (nvdev->resmem.memlength)
+ unregister_pfn_address_space(&nvdev->resmem.pfn_address_space);
+
+ unregister_pfn_address_space(&nvdev->usemem.pfn_address_space);
+
/* Unmap the mapping to the device memory cached region */
if (nvdev->usemem.memaddr) {
memunmap(nvdev->usemem.memaddr);
@@ -247,6 +342,16 @@ static const struct vm_operations_struct nvgrace_gpu_vfio_pci_mmap_ops = {
#endif
};
+static inline
+struct nvgrace_gpu_pci_core_device *vma_to_nvdev(struct vm_area_struct *vma)
+{
+ /* Check if this VMA belongs to us */
+ if (vma->vm_ops != &nvgrace_gpu_vfio_pci_mmap_ops)
+ return NULL;
+
+ return vma->vm_private_data;
+}
+
static int nvgrace_gpu_mmap(struct vfio_device *core_vdev,
struct vm_area_struct *vma)
{
@@ -784,7 +889,7 @@ nvgrace_gpu_write(struct vfio_device *core_vdev,
static int nvgrace_get_dmabuf_phys(struct vfio_pci_core_device *core_vdev,
struct p2pdma_provider **provider,
unsigned int region_index,
- struct dma_buf_phys_vec *phys_vec,
+ struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges)
{
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 3a11e6f450f7..72c33b399800 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -588,6 +588,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_enable);
void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
{
+ struct pci_dev *bridge;
struct pci_dev *pdev = vdev->pdev;
struct vfio_pci_dummy_resource *dummy_res, *tmp;
struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp;
@@ -694,12 +695,20 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
* We can not use the "try" reset interface here, which will
* overwrite the previously restored configuration information.
*/
- if (vdev->reset_works && pci_dev_trylock(pdev)) {
- if (!__pci_reset_function_locked(pdev))
- vdev->needs_reset = false;
- pci_dev_unlock(pdev);
+ if (vdev->reset_works) {
+ bridge = pci_upstream_bridge(pdev);
+ if (bridge && !pci_dev_trylock(bridge))
+ goto out_restore_state;
+ if (pci_dev_trylock(pdev)) {
+ if (!__pci_reset_function_locked(pdev))
+ vdev->needs_reset = false;
+ pci_dev_unlock(pdev);
+ }
+ if (bridge)
+ pci_dev_unlock(bridge);
}
+out_restore_state:
pci_restore_state(pdev);
out:
pci_disable_device(pdev);
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
index 4be4a85005cb..9918713d5774 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -14,7 +14,7 @@ struct vfio_pci_dma_buf {
struct vfio_pci_core_device *vdev;
struct list_head dmabufs_elm;
size_t size;
- struct dma_buf_phys_vec *phys_vec;
+ struct phys_vec *phys_vec;
struct p2pdma_provider *provider;
u32 nr_ranges;
u8 revoked : 1;
@@ -106,7 +106,7 @@ static const struct dma_buf_ops vfio_pci_dmabuf_ops = {
* will fail if it is currently revoked
*/
int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
- struct dma_buf_phys_vec *phys)
+ struct phys_vec *phys)
{
struct vfio_pci_dma_buf *priv;
@@ -128,7 +128,7 @@ int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
}
EXPORT_SYMBOL_FOR_MODULES(vfio_pci_dma_buf_iommufd_map, "iommufd");
-int vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec,
+int vfio_pci_core_fill_phys_vec(struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges, phys_addr_t start,
phys_addr_t len)
@@ -160,7 +160,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_fill_phys_vec);
int vfio_pci_core_get_dmabuf_phys(struct vfio_pci_core_device *vdev,
struct p2pdma_provider **provider,
unsigned int region_index,
- struct dma_buf_phys_vec *phys_vec,
+ struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges)
{
diff --git a/include/linux/dma-buf-mapping.h b/include/linux/dma-buf-mapping.h
index a3c0ce2d3a42..09bde3f748e4 100644
--- a/include/linux/dma-buf-mapping.h
+++ b/include/linux/dma-buf-mapping.h
@@ -9,7 +9,7 @@
struct sg_table *dma_buf_phys_vec_to_sgt(struct dma_buf_attachment *attach,
struct p2pdma_provider *provider,
- struct dma_buf_phys_vec *phys_vec,
+ struct phys_vec *phys_vec,
size_t nr_ranges, size_t size,
enum dma_data_direction dir);
void dma_buf_free_sgt(struct dma_buf_attachment *attach, struct sg_table *sgt,
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 91f4939db89b..133b9e637b55 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -520,16 +520,6 @@ struct dma_buf_export_info {
};
/**
- * struct dma_buf_phys_vec - describe continuous chunk of memory
- * @paddr: physical address of that chunk
- * @len: Length of this chunk
- */
-struct dma_buf_phys_vec {
- phys_addr_t paddr;
- size_t len;
-};
-
-/**
* DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters
* @name: export-info name
*
diff --git a/include/linux/memory-failure.h b/include/linux/memory-failure.h
index 7b5e11cf905f..d333dcdbeae7 100644
--- a/include/linux/memory-failure.h
+++ b/include/linux/memory-failure.h
@@ -4,8 +4,6 @@
#include <linux/interval_tree.h>
-struct pfn_address_space;
-
struct pfn_address_space {
struct interval_tree_node node;
struct address_space *mapping;
@@ -13,7 +11,18 @@ struct pfn_address_space {
unsigned long pfn, pgoff_t *pgoff);
};
+#ifdef CONFIG_MEMORY_FAILURE
int register_pfn_address_space(struct pfn_address_space *pfn_space);
void unregister_pfn_address_space(struct pfn_address_space *pfn_space);
+#else
+static inline int register_pfn_address_space(struct pfn_address_space *pfn_space)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void unregister_pfn_address_space(struct pfn_address_space *pfn_space)
+{
+}
+#endif /* CONFIG_MEMORY_FAILURE */
#endif /* _LINUX_MEMORY_FAILURE_H */
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 1ac86896875c..2ebba746c18f 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -28,7 +28,6 @@
struct vfio_pci_core_device;
struct vfio_pci_region;
struct p2pdma_provider;
-struct dma_buf_phys_vec;
struct dma_buf_attachment;
struct vfio_pci_eventfd {
@@ -62,25 +61,25 @@ struct vfio_pci_device_ops {
int (*get_dmabuf_phys)(struct vfio_pci_core_device *vdev,
struct p2pdma_provider **provider,
unsigned int region_index,
- struct dma_buf_phys_vec *phys_vec,
+ struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges);
};
#if IS_ENABLED(CONFIG_VFIO_PCI_DMABUF)
-int vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec,
+int vfio_pci_core_fill_phys_vec(struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges, phys_addr_t start,
phys_addr_t len);
int vfio_pci_core_get_dmabuf_phys(struct vfio_pci_core_device *vdev,
struct p2pdma_provider **provider,
unsigned int region_index,
- struct dma_buf_phys_vec *phys_vec,
+ struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges);
#else
static inline int
-vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec,
+vfio_pci_core_fill_phys_vec(struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges, phys_addr_t start,
phys_addr_t len)
@@ -89,7 +88,7 @@ vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec,
}
static inline int vfio_pci_core_get_dmabuf_phys(
struct vfio_pci_core_device *vdev, struct p2pdma_provider **provider,
- unsigned int region_index, struct dma_buf_phys_vec *phys_vec,
+ unsigned int region_index, struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges, size_t nr_ranges)
{
return -EOPNOTSUPP;
@@ -236,6 +235,6 @@ static inline bool is_aligned_for_order(struct vm_area_struct *vma,
}
int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
- struct dma_buf_phys_vec *phys);
+ struct phys_vec *phys);
#endif /* VFIO_PCI_CORE_H */
diff --git a/tools/testing/selftests/vfio/Makefile b/tools/testing/selftests/vfio/Makefile
index 3c796ca99a50..8e90e409e91d 100644
--- a/tools/testing/selftests/vfio/Makefile
+++ b/tools/testing/selftests/vfio/Makefile
@@ -1,5 +1,13 @@
+ARCH ?= $(shell uname -m)
+
+ifeq (,$(filter $(ARCH),arm64 x86_64))
+# Do nothing on unsupported architectures
+include ../lib.mk
+else
+
CFLAGS = $(KHDR_INCLUDES)
TEST_GEN_PROGS += vfio_dma_mapping_test
+TEST_GEN_PROGS += vfio_dma_mapping_mmio_test
TEST_GEN_PROGS += vfio_iommufd_setup_test
TEST_GEN_PROGS += vfio_pci_device_test
TEST_GEN_PROGS += vfio_pci_device_init_perf_test
@@ -27,3 +35,5 @@ TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_PROGS_O) $(LIBVFIO_O))
-include $(TEST_DEP_FILES)
EXTRA_CLEAN += $(TEST_GEN_PROGS_O) $(TEST_DEP_FILES)
+
+endif
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio.h b/tools/testing/selftests/vfio/lib/include/libvfio.h
index 279ddcd70194..1b6da54cc2cb 100644
--- a/tools/testing/selftests/vfio/lib/include/libvfio.h
+++ b/tools/testing/selftests/vfio/lib/include/libvfio.h
@@ -23,4 +23,13 @@
const char *vfio_selftests_get_bdf(int *argc, char *argv[]);
char **vfio_selftests_get_bdfs(int *argc, char *argv[], int *nr_bdfs);
+/*
+ * Reserve virtual address space of size at an address satisfying
+ * (vaddr % align) == offset.
+ *
+ * Returns the reserved vaddr. The caller is responsible for unmapping
+ * the returned region.
+ */
+void *mmap_reserve(size_t size, size_t align, size_t offset);
+
#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
index 5c9b9dc6d993..e9a3386a4719 100644
--- a/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
@@ -61,6 +61,12 @@ iova_t iommu_hva2iova(struct iommu *iommu, void *vaddr);
struct iommu_iova_range *iommu_iova_ranges(struct iommu *iommu, u32 *nranges);
+#define MODE_VFIO_TYPE1_IOMMU "vfio_type1_iommu"
+#define MODE_VFIO_TYPE1V2_IOMMU "vfio_type1v2_iommu"
+#define MODE_IOMMUFD_COMPAT_TYPE1 "iommufd_compat_type1"
+#define MODE_IOMMUFD_COMPAT_TYPE1V2 "iommufd_compat_type1v2"
+#define MODE_IOMMUFD "iommufd"
+
/*
* Generator for VFIO selftests fixture variants that replicate across all
* possible IOMMU modes. Tests must define FIXTURE_VARIANT_ADD_IOMMU_MODE()
diff --git a/tools/testing/selftests/vfio/lib/iommu.c b/tools/testing/selftests/vfio/lib/iommu.c
index 58b7fb7430d4..035dac069d60 100644
--- a/tools/testing/selftests/vfio/lib/iommu.c
+++ b/tools/testing/selftests/vfio/lib/iommu.c
@@ -20,32 +20,32 @@
#include "../../../kselftest.h"
#include <libvfio.h>
-const char *default_iommu_mode = "iommufd";
+const char *default_iommu_mode = MODE_IOMMUFD;
/* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */
static const struct iommu_mode iommu_modes[] = {
{
- .name = "vfio_type1_iommu",
+ .name = MODE_VFIO_TYPE1_IOMMU,
.container_path = "/dev/vfio/vfio",
.iommu_type = VFIO_TYPE1_IOMMU,
},
{
- .name = "vfio_type1v2_iommu",
+ .name = MODE_VFIO_TYPE1V2_IOMMU,
.container_path = "/dev/vfio/vfio",
.iommu_type = VFIO_TYPE1v2_IOMMU,
},
{
- .name = "iommufd_compat_type1",
+ .name = MODE_IOMMUFD_COMPAT_TYPE1,
.container_path = "/dev/iommu",
.iommu_type = VFIO_TYPE1_IOMMU,
},
{
- .name = "iommufd_compat_type1v2",
+ .name = MODE_IOMMUFD_COMPAT_TYPE1V2,
.container_path = "/dev/iommu",
.iommu_type = VFIO_TYPE1v2_IOMMU,
},
{
- .name = "iommufd",
+ .name = MODE_IOMMUFD,
},
};
diff --git a/tools/testing/selftests/vfio/lib/libvfio.c b/tools/testing/selftests/vfio/lib/libvfio.c
index a23a3cc5be69..3a3d1ed635c1 100644
--- a/tools/testing/selftests/vfio/lib/libvfio.c
+++ b/tools/testing/selftests/vfio/lib/libvfio.c
@@ -2,6 +2,9 @@
#include <stdio.h>
#include <stdlib.h>
+#include <sys/mman.h>
+
+#include <linux/align.h>
#include "../../../kselftest.h"
#include <libvfio.h>
@@ -76,3 +79,25 @@ const char *vfio_selftests_get_bdf(int *argc, char *argv[])
return vfio_selftests_get_bdfs(argc, argv, &nr_bdfs)[0];
}
+
+void *mmap_reserve(size_t size, size_t align, size_t offset)
+{
+ void *map_base, *map_align;
+ size_t delta;
+
+ VFIO_ASSERT_GT(align, offset);
+ delta = align - offset;
+
+ map_base = mmap(NULL, size + align, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ VFIO_ASSERT_NE(map_base, MAP_FAILED);
+
+ map_align = (void *)(ALIGN((uintptr_t)map_base + delta, align) - delta);
+
+ if (map_align > map_base)
+ VFIO_ASSERT_EQ(munmap(map_base, map_align - map_base), 0);
+
+ VFIO_ASSERT_EQ(munmap(map_align + size, map_base + align - map_align), 0);
+
+ return map_align;
+}
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
index fac4c0ecadef..4e5871f1ebc3 100644
--- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -11,10 +11,14 @@
#include <sys/ioctl.h>
#include <sys/mman.h>
+#include <linux/align.h>
#include <linux/iommufd.h>
+#include <linux/kernel.h>
#include <linux/limits.h>
+#include <linux/log2.h>
#include <linux/mman.h>
#include <linux/overflow.h>
+#include <linux/sizes.h>
#include <linux/types.h>
#include <linux/vfio.h>
@@ -123,20 +127,38 @@ static void vfio_pci_region_get(struct vfio_pci_device *device, int index,
static void vfio_pci_bar_map(struct vfio_pci_device *device, int index)
{
struct vfio_pci_bar *bar = &device->bars[index];
+ size_t align, size;
int prot = 0;
+ void *vaddr;
VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
VFIO_ASSERT_NULL(bar->vaddr);
VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP);
+ VFIO_ASSERT_TRUE(is_power_of_2(bar->info.size));
if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ)
prot |= PROT_READ;
if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE)
prot |= PROT_WRITE;
- bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED,
+ size = bar->info.size;
+
+ /*
+ * Align BAR mmaps to improve page fault granularity during potential
+ * subsequent IOMMU mapping of these BAR vaddr. 1G for x86 is the
+ * largest hugepage size across any architecture, so no benefit from
+ * larger alignment. BARs smaller than 1G will be aligned by their
+ * power-of-two size, guaranteeing sufficient alignment for smaller
+ * hugepages, if present.
+ */
+ align = min_t(size_t, size, SZ_1G);
+
+ vaddr = mmap_reserve(size, align, 0);
+ bar->vaddr = mmap(vaddr, size, prot, MAP_SHARED | MAP_FIXED,
device->fd, bar->info.offset);
VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED);
+
+ madvise(bar->vaddr, size, MADV_HUGEPAGE);
}
static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index)
diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_mmio_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_mmio_test.c
new file mode 100644
index 000000000000..957a89ce7b3a
--- /dev/null
+++ b/tools/testing/selftests/vfio/vfio_dma_mapping_mmio_test.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <uapi/linux/types.h>
+#include <linux/pci_regs.h>
+#include <linux/sizes.h>
+#include <linux/vfio.h>
+
+#include <libvfio.h>
+
+#include "../kselftest_harness.h"
+
+static const char *device_bdf;
+
+static struct vfio_pci_bar *largest_mapped_bar(struct vfio_pci_device *device)
+{
+ u32 flags = VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE;
+ struct vfio_pci_bar *largest = NULL;
+ u64 bar_size = 0;
+
+ for (int i = 0; i < PCI_STD_NUM_BARS; i++) {
+ struct vfio_pci_bar *bar = &device->bars[i];
+
+ if (!bar->vaddr)
+ continue;
+
+ /*
+ * iommu_map() maps with READ|WRITE, so require the same
+ * abilities for the underlying VFIO region.
+ */
+ if ((bar->info.flags & flags) != flags)
+ continue;
+
+ if (bar->info.size > bar_size) {
+ bar_size = bar->info.size;
+ largest = bar;
+ }
+ }
+
+ return largest;
+}
+
+FIXTURE(vfio_dma_mapping_mmio_test) {
+ struct iommu *iommu;
+ struct vfio_pci_device *device;
+ struct iova_allocator *iova_allocator;
+ struct vfio_pci_bar *bar;
+};
+
+FIXTURE_VARIANT(vfio_dma_mapping_mmio_test) {
+ const char *iommu_mode;
+};
+
+#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \
+FIXTURE_VARIANT_ADD(vfio_dma_mapping_mmio_test, _iommu_mode) { \
+ .iommu_mode = #_iommu_mode, \
+}
+
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES();
+
+#undef FIXTURE_VARIANT_ADD_IOMMU_MODE
+
+FIXTURE_SETUP(vfio_dma_mapping_mmio_test)
+{
+ self->iommu = iommu_init(variant->iommu_mode);
+ self->device = vfio_pci_device_init(device_bdf, self->iommu);
+ self->iova_allocator = iova_allocator_init(self->iommu);
+ self->bar = largest_mapped_bar(self->device);
+
+ if (!self->bar)
+ SKIP(return, "No mappable BAR found on device %s", device_bdf);
+}
+
+FIXTURE_TEARDOWN(vfio_dma_mapping_mmio_test)
+{
+ iova_allocator_cleanup(self->iova_allocator);
+ vfio_pci_device_cleanup(self->device);
+ iommu_cleanup(self->iommu);
+}
+
+static void do_mmio_map_test(struct iommu *iommu,
+ struct iova_allocator *iova_allocator,
+ void *vaddr, size_t size)
+{
+ struct dma_region region = {
+ .vaddr = vaddr,
+ .size = size,
+ .iova = iova_allocator_alloc(iova_allocator, size),
+ };
+
+ /*
+ * NOTE: Check for iommufd compat success once it lands. Native iommufd
+ * will never support this.
+ */
+ if (!strcmp(iommu->mode->name, MODE_VFIO_TYPE1V2_IOMMU) ||
+ !strcmp(iommu->mode->name, MODE_VFIO_TYPE1_IOMMU)) {
+ iommu_map(iommu, &region);
+ iommu_unmap(iommu, &region);
+ } else {
+ VFIO_ASSERT_NE(__iommu_map(iommu, &region), 0);
+ VFIO_ASSERT_NE(__iommu_unmap(iommu, &region, NULL), 0);
+ }
+}
+
+TEST_F(vfio_dma_mapping_mmio_test, map_full_bar)
+{
+ do_mmio_map_test(self->iommu, self->iova_allocator,
+ self->bar->vaddr, self->bar->info.size);
+}
+
+TEST_F(vfio_dma_mapping_mmio_test, map_partial_bar)
+{
+ if (self->bar->info.size < 2 * getpagesize())
+ SKIP(return, "BAR too small (size=0x%llx)", self->bar->info.size);
+
+ do_mmio_map_test(self->iommu, self->iova_allocator,
+ self->bar->vaddr, getpagesize());
+}
+
+/* Test IOMMU mapping of BAR mmap with intentionally poor vaddr alignment. */
+TEST_F(vfio_dma_mapping_mmio_test, map_bar_misaligned)
+{
+ /* Limit size to bound test time for large BARs */
+ size_t size = min_t(size_t, self->bar->info.size, SZ_1G);
+ void *vaddr;
+
+ vaddr = mmap_reserve(size, SZ_1G, getpagesize());
+ vaddr = mmap(vaddr, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED,
+ self->device->fd, self->bar->info.offset);
+ VFIO_ASSERT_NE(vaddr, MAP_FAILED);
+
+ do_mmio_map_test(self->iommu, self->iova_allocator, vaddr, size);
+
+ VFIO_ASSERT_EQ(munmap(vaddr, size), 0);
+}
+
+int main(int argc, char *argv[])
+{
+ device_bdf = vfio_selftests_get_bdf(&argc, argv);
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
index 3bf984b337ac..abb170bdcef7 100644
--- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
+++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
@@ -161,12 +161,8 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap)
if (rc == -EOPNOTSUPP)
goto unmap;
- /*
- * IOMMUFD compatibility-mode does not support huge mappings when
- * using VFIO_TYPE1_IOMMU.
- */
- if (!strcmp(variant->iommu_mode, "iommufd_compat_type1"))
- mapping_size = SZ_4K;
+ if (self->iommu->mode->iommu_type == VFIO_TYPE1_IOMMU)
+ goto unmap;
ASSERT_EQ(0, rc);
printf("Found IOMMU mappings for IOVA 0x%lx:\n", region.iova);