From ae03c3771b8cbbed3802ad1153d896c32015c520 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 5 Aug 2021 22:18:59 -0300 Subject: vfio: Introduce a vfio_uninit_group_dev() API call This pairs with vfio_init_group_dev() and allows undoing any state that is stored in the vfio_device unrelated to registration. Add appropriately placed calls to all the drivers. The following patch will use this to add pre-registration state for the device set. Signed-off-by: Max Gurtovoy Reviewed-by: Cornelia Huck Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index a2c5b30e1763..b0875cf8e496 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -61,6 +61,7 @@ extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev); void vfio_init_group_dev(struct vfio_device *device, struct device *dev, const struct vfio_device_ops *ops); +void vfio_uninit_group_dev(struct vfio_device *device); int vfio_register_group_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device); extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); -- cgit v1.2.3 From 2fd585f4ed9de9b9259e95affdd7d8cde06b48c3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Aug 2021 22:19:00 -0300 Subject: vfio: Provide better generic support for open/release vfio_device_ops Currently the driver ops have an open/release pair that is called once each time a device FD is opened or closed. Add an additional set of open/close_device() ops which are called when the device FD is opened for the first time and closed for the last time. An analysis shows that all of the drivers require this semantic. Some are open coding it as part of their reflck implementation, and some are just buggy and miss it completely. To retain the current semantics PCI and FSL depend on, introduce the idea of a "device set" which is a grouping of vfio_device's that share the same lock around opening. The device set is established by providing a 'set_id' pointer. All vfio_device's that provide the same pointer will be joined to the same singleton memory and lock across the whole set. This effectively replaces the oddly named reflck. After conversion the set_id will be sourced from: - A struct device from a fsl_mc_device (fsl) - A struct pci_slot (pci) - A struct pci_bus (pci) - The struct vfio_device (everything) The design ensures that the above pointers are live as long as the vfio_device is registered, so they form reliable unique keys to group vfio_devices into sets. This implementation uses xarray instead of searching through the driver core structures, which simplifies the somewhat tricky locking in this area. Following patches convert all the drivers. Signed-off-by: Yishai Hadas Reviewed-by: Cornelia Huck Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/mdev/vfio_mdev.c | 26 +++++++- drivers/vfio/vfio.c | 149 +++++++++++++++++++++++++++++++++++------- include/linux/mdev.h | 2 + include/linux/vfio.h | 21 ++++++ 4 files changed, 174 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c index a5c77ccb24f7..e12196ffd487 100644 --- a/drivers/vfio/mdev/vfio_mdev.c +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -17,13 +17,33 @@ #include "mdev_private.h" +static int vfio_mdev_open_device(struct vfio_device *core_vdev) +{ + struct mdev_device *mdev = to_mdev_device(core_vdev->dev); + struct mdev_parent *parent = mdev->type->parent; + + if (unlikely(!parent->ops->open_device)) + return 0; + + return parent->ops->open_device(mdev); +} + +static void vfio_mdev_close_device(struct vfio_device *core_vdev) +{ + struct mdev_device *mdev = to_mdev_device(core_vdev->dev); + struct mdev_parent *parent = mdev->type->parent; + + if (likely(parent->ops->close_device)) + parent->ops->close_device(mdev); +} + static int vfio_mdev_open(struct vfio_device *core_vdev) { struct mdev_device *mdev = to_mdev_device(core_vdev->dev); struct mdev_parent *parent = mdev->type->parent; if (unlikely(!parent->ops->open)) - return -EINVAL; + return 0; return parent->ops->open(mdev); } @@ -44,7 +64,7 @@ static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev, struct mdev_parent *parent = mdev->type->parent; if (unlikely(!parent->ops->ioctl)) - return -EINVAL; + return 0; return parent->ops->ioctl(mdev, cmd, arg); } @@ -100,6 +120,8 @@ static void vfio_mdev_request(struct vfio_device *core_vdev, unsigned int count) static const struct vfio_device_ops vfio_mdev_dev_ops = { .name = "vfio-mdev", + .open_device = vfio_mdev_open_device, + .close_device = vfio_mdev_close_device, .open = vfio_mdev_open, .release = vfio_mdev_release, .ioctl = vfio_mdev_unlocked_ioctl, diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index cc375df0fd5d..9cc17768c425 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -96,6 +96,79 @@ module_param_named(enable_unsafe_noiommu_mode, MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); #endif +static DEFINE_XARRAY(vfio_device_set_xa); + +int vfio_assign_device_set(struct vfio_device *device, void *set_id) +{ + unsigned long idx = (unsigned long)set_id; + struct vfio_device_set *new_dev_set; + struct vfio_device_set *dev_set; + + if (WARN_ON(!set_id)) + return -EINVAL; + + /* + * Atomically acquire a singleton object in the xarray for this set_id + */ + xa_lock(&vfio_device_set_xa); + dev_set = xa_load(&vfio_device_set_xa, idx); + if (dev_set) + goto found_get_ref; + xa_unlock(&vfio_device_set_xa); + + new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); + if (!new_dev_set) + return -ENOMEM; + mutex_init(&new_dev_set->lock); + INIT_LIST_HEAD(&new_dev_set->device_list); + new_dev_set->set_id = set_id; + + xa_lock(&vfio_device_set_xa); + dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, + GFP_KERNEL); + if (!dev_set) { + dev_set = new_dev_set; + goto found_get_ref; + } + + kfree(new_dev_set); + if (xa_is_err(dev_set)) { + xa_unlock(&vfio_device_set_xa); + return xa_err(dev_set); + } + +found_get_ref: + dev_set->device_count++; + xa_unlock(&vfio_device_set_xa); + mutex_lock(&dev_set->lock); + device->dev_set = dev_set; + list_add_tail(&device->dev_set_list, &dev_set->device_list); + mutex_unlock(&dev_set->lock); + return 0; +} +EXPORT_SYMBOL_GPL(vfio_assign_device_set); + +static void vfio_release_device_set(struct vfio_device *device) +{ + struct vfio_device_set *dev_set = device->dev_set; + + if (!dev_set) + return; + + mutex_lock(&dev_set->lock); + list_del(&device->dev_set_list); + mutex_unlock(&dev_set->lock); + + xa_lock(&vfio_device_set_xa); + if (!--dev_set->device_count) { + __xa_erase(&vfio_device_set_xa, + (unsigned long)dev_set->set_id); + mutex_destroy(&dev_set->lock); + kfree(dev_set); + } + xa_unlock(&vfio_device_set_xa); +} + /* * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe * and remove functions, any use cases other than acquiring the first @@ -751,6 +824,7 @@ EXPORT_SYMBOL_GPL(vfio_init_group_dev); void vfio_uninit_group_dev(struct vfio_device *device) { + vfio_release_device_set(device); } EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); @@ -760,6 +834,13 @@ int vfio_register_group_dev(struct vfio_device *device) struct iommu_group *iommu_group; struct vfio_group *group; + /* + * If the driver doesn't specify a set then the device is added to a + * singleton set just for itself. + */ + if (!device->dev_set) + vfio_assign_device_set(device, device); + iommu_group = iommu_group_get(device->dev); if (!iommu_group) return -EINVAL; @@ -1361,7 +1442,8 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) { struct vfio_device *device; struct file *filep; - int ret; + int fdno; + int ret = 0; if (0 == atomic_read(&group->container_users) || !group->container->iommu_driver || !vfio_group_viable(group)) @@ -1375,38 +1457,38 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) return PTR_ERR(device); if (!try_module_get(device->dev->driver->owner)) { - vfio_device_put(device); - return -ENODEV; + ret = -ENODEV; + goto err_device_put; } - ret = device->ops->open(device); - if (ret) { - module_put(device->dev->driver->owner); - vfio_device_put(device); - return ret; + mutex_lock(&device->dev_set->lock); + device->open_count++; + if (device->open_count == 1 && device->ops->open_device) { + ret = device->ops->open_device(device); + if (ret) + goto err_undo_count; + } + mutex_unlock(&device->dev_set->lock); + + if (device->ops->open) { + ret = device->ops->open(device); + if (ret) + goto err_close_device; } /* * We can't use anon_inode_getfd() because we need to modify * the f_mode flags directly to allow more than just ioctls */ - ret = get_unused_fd_flags(O_CLOEXEC); - if (ret < 0) { - device->ops->release(device); - module_put(device->dev->driver->owner); - vfio_device_put(device); - return ret; - } + fdno = ret = get_unused_fd_flags(O_CLOEXEC); + if (ret < 0) + goto err_release; filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, device, O_RDWR); if (IS_ERR(filep)) { - put_unused_fd(ret); ret = PTR_ERR(filep); - device->ops->release(device); - module_put(device->dev->driver->owner); - vfio_device_put(device); - return ret; + goto err_fd; } /* @@ -1418,12 +1500,28 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) atomic_inc(&group->container_users); - fd_install(ret, filep); + fd_install(fdno, filep); if (group->noiommu) dev_warn(device->dev, "vfio-noiommu device opened by user " "(%s:%d)\n", current->comm, task_pid_nr(current)); + return fdno; +err_fd: + put_unused_fd(fdno); +err_release: + if (device->ops->release) + device->ops->release(device); +err_close_device: + mutex_lock(&device->dev_set->lock); + if (device->open_count == 1 && device->ops->close_device) + device->ops->close_device(device); +err_undo_count: + device->open_count--; + mutex_unlock(&device->dev_set->lock); + module_put(device->dev->driver->owner); +err_device_put: + vfio_device_put(device); return ret; } @@ -1561,7 +1659,13 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) { struct vfio_device *device = filep->private_data; - device->ops->release(device); + if (device->ops->release) + device->ops->release(device); + + mutex_lock(&device->dev_set->lock); + if (!--device->open_count && device->ops->close_device) + device->ops->close_device(device); + mutex_unlock(&device->dev_set->lock); module_put(device->dev->driver->owner); @@ -2364,6 +2468,7 @@ static void __exit vfio_cleanup(void) class_destroy(vfio.class); vfio.class = NULL; misc_deregister(&vfio_dev); + xa_destroy(&vfio_device_set_xa); } module_init(vfio_init); diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 3a38598c2605..cb5b7ed1d7c3 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -111,6 +111,8 @@ struct mdev_parent_ops { int (*create)(struct mdev_device *mdev); int (*remove)(struct mdev_device *mdev); + int (*open_device)(struct mdev_device *mdev); + void (*close_device)(struct mdev_device *mdev); int (*open)(struct mdev_device *mdev); void (*release)(struct mdev_device *mdev); ssize_t (*read)(struct mdev_device *mdev, char __user *buf, diff --git a/include/linux/vfio.h b/include/linux/vfio.h index b0875cf8e496..f0e6a72875e4 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -15,13 +15,28 @@ #include #include +/* + * VFIO devices can be placed in a set, this allows all devices to share this + * structure and the VFIO core will provide a lock that is held around + * open_device()/close_device() for all devices in the set. + */ +struct vfio_device_set { + void *set_id; + struct mutex lock; + struct list_head device_list; + unsigned int device_count; +}; + struct vfio_device { struct device *dev; const struct vfio_device_ops *ops; struct vfio_group *group; + struct vfio_device_set *dev_set; + struct list_head dev_set_list; /* Members below here are private, not for driver use */ refcount_t refcount; + unsigned int open_count; struct completion comp; struct list_head group_next; }; @@ -29,6 +44,8 @@ struct vfio_device { /** * struct vfio_device_ops - VFIO bus driver device callbacks * + * @open_device: Called when the first file descriptor is opened for this device + * @close_device: Opposite of open_device * @open: Called when userspace creates new file descriptor for device * @release: Called when userspace releases file descriptor for device * @read: Perform read(2) on device file descriptor @@ -43,6 +60,8 @@ struct vfio_device { */ struct vfio_device_ops { char *name; + int (*open_device)(struct vfio_device *vdev); + void (*close_device)(struct vfio_device *vdev); int (*open)(struct vfio_device *vdev); void (*release)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -67,6 +86,8 @@ void vfio_unregister_group_dev(struct vfio_device *device); extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); extern void vfio_device_put(struct vfio_device *device); +int vfio_assign_device_set(struct vfio_device *device, void *set_id); + /* events for the backend driver notify callback */ enum vfio_iommu_notify_type { VFIO_IOMMU_CONTAINER_CLOSE = 0, -- cgit v1.2.3 From eb24c1007e6852e024dc33b0dd9617b8500a1291 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Aug 2021 22:19:10 -0300 Subject: vfio: Remove struct vfio_device_ops open/release Nothing uses this anymore, delete it. Signed-off-by: Yishai Hadas Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/14-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/mdev/vfio_mdev.c | 22 ---------------------- drivers/vfio/vfio.c | 14 +------------- include/linux/mdev.h | 7 ------- include/linux/vfio.h | 4 ---- 4 files changed, 1 insertion(+), 46 deletions(-) (limited to 'include/linux') diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c index e12196ffd487..7a9883048216 100644 --- a/drivers/vfio/mdev/vfio_mdev.c +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -37,26 +37,6 @@ static void vfio_mdev_close_device(struct vfio_device *core_vdev) parent->ops->close_device(mdev); } -static int vfio_mdev_open(struct vfio_device *core_vdev) -{ - struct mdev_device *mdev = to_mdev_device(core_vdev->dev); - struct mdev_parent *parent = mdev->type->parent; - - if (unlikely(!parent->ops->open)) - return 0; - - return parent->ops->open(mdev); -} - -static void vfio_mdev_release(struct vfio_device *core_vdev) -{ - struct mdev_device *mdev = to_mdev_device(core_vdev->dev); - struct mdev_parent *parent = mdev->type->parent; - - if (likely(parent->ops->release)) - parent->ops->release(mdev); -} - static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg) { @@ -122,8 +102,6 @@ static const struct vfio_device_ops vfio_mdev_dev_ops = { .name = "vfio-mdev", .open_device = vfio_mdev_open_device, .close_device = vfio_mdev_close_device, - .open = vfio_mdev_open, - .release = vfio_mdev_release, .ioctl = vfio_mdev_unlocked_ioctl, .read = vfio_mdev_read, .write = vfio_mdev_write, diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 9cc17768c425..3c034fe14ccb 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1470,19 +1470,13 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) } mutex_unlock(&device->dev_set->lock); - if (device->ops->open) { - ret = device->ops->open(device); - if (ret) - goto err_close_device; - } - /* * We can't use anon_inode_getfd() because we need to modify * the f_mode flags directly to allow more than just ioctls */ fdno = ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) - goto err_release; + goto err_close_device; filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, device, O_RDWR); @@ -1509,9 +1503,6 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) err_fd: put_unused_fd(fdno); -err_release: - if (device->ops->release) - device->ops->release(device); err_close_device: mutex_lock(&device->dev_set->lock); if (device->open_count == 1 && device->ops->close_device) @@ -1659,9 +1650,6 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) { struct vfio_device *device = filep->private_data; - if (device->ops->release) - device->ops->release(device); - mutex_lock(&device->dev_set->lock); if (!--device->open_count && device->ops->close_device) device->ops->close_device(device); diff --git a/include/linux/mdev.h b/include/linux/mdev.h index cb5b7ed1d7c3..68427e8fadeb 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -72,11 +72,6 @@ struct device *mtype_get_parent_dev(struct mdev_type *mtype); * @mdev: mdev_device device structure which is being * destroyed * Returns integer: success (0) or error (< 0) - * @open: Open mediated device. - * @mdev: mediated device. - * Returns integer: success (0) or error (< 0) - * @release: release mediated device - * @mdev: mediated device. * @read: Read emulation callback * @mdev: mediated device structure * @buf: read buffer @@ -113,8 +108,6 @@ struct mdev_parent_ops { int (*remove)(struct mdev_device *mdev); int (*open_device)(struct mdev_device *mdev); void (*close_device)(struct mdev_device *mdev); - int (*open)(struct mdev_device *mdev); - void (*release)(struct mdev_device *mdev); ssize_t (*read)(struct mdev_device *mdev, char __user *buf, size_t count, loff_t *ppos); ssize_t (*write)(struct mdev_device *mdev, const char __user *buf, diff --git a/include/linux/vfio.h b/include/linux/vfio.h index f0e6a72875e4..b53a9557884a 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -46,8 +46,6 @@ struct vfio_device { * * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device - * @open: Called when userspace creates new file descriptor for device - * @release: Called when userspace releases file descriptor for device * @read: Perform read(2) on device file descriptor * @write: Perform write(2) on device file descriptor * @ioctl: Perform ioctl(2) on device file descriptor, supporting VFIO_DEVICE_* @@ -62,8 +60,6 @@ struct vfio_device_ops { char *name; int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); - int (*open)(struct vfio_device *vdev); - void (*release)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, size_t count, loff_t *ppos); ssize_t (*write)(struct vfio_device *vdev, const char __user *buf, -- cgit v1.2.3 From 343b7258687ecfbb363bfda8833a7cf641aac524 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 26 Aug 2021 13:39:08 +0300 Subject: PCI: Add 'override_only' field to struct pci_device_id Add 'override_only' field to struct pci_device_id to be used as part of pci_match_device(). When set, a driver only matches the entry when dev->driver_override is set to that driver. In addition, add a helper macro named 'PCI_DEVICE_DRIVER_OVERRIDE' to enable setting some data on it. Next patch from this series will use the above functionality. Signed-off-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20210826103912.128972-10-yishaih@nvidia.com Signed-off-by: Alex Williamson --- Documentation/PCI/pci.rst | 1 + drivers/pci/pci-driver.c | 28 +++++++++++++++++++++------- include/linux/mod_devicetable.h | 2 ++ include/linux/pci.h | 15 +++++++++++++++ 4 files changed, 39 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/Documentation/PCI/pci.rst b/Documentation/PCI/pci.rst index fa651e25d98c..87c6f4a6ca32 100644 --- a/Documentation/PCI/pci.rst +++ b/Documentation/PCI/pci.rst @@ -103,6 +103,7 @@ need pass only as many optional fields as necessary: - subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF) - class and classmask fields default to 0 - driver_data defaults to 0UL. + - override_only field defaults to 0. Note that driver_data must match the value used by any of the pci_device_id entries defined in the driver. This makes the driver_data field mandatory diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 3a72352aa5cf..123c590ebe1d 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -136,7 +136,7 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv, struct pci_dev *dev) { struct pci_dynid *dynid; - const struct pci_device_id *found_id = NULL; + const struct pci_device_id *found_id = NULL, *ids; /* When driver_override is set, only bind to the matching driver */ if (dev->driver_override && strcmp(dev->driver_override, drv->name)) @@ -152,14 +152,28 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv, } spin_unlock(&drv->dynids.lock); - if (!found_id) - found_id = pci_match_id(drv->id_table, dev); + if (found_id) + return found_id; - /* driver_override will always match, send a dummy id */ - if (!found_id && dev->driver_override) - found_id = &pci_device_id_any; + for (ids = drv->id_table; (found_id = pci_match_id(ids, dev)); + ids = found_id + 1) { + /* + * The match table is split based on driver_override. + * In case override_only was set, enforce driver_override + * matching. + */ + if (found_id->override_only) { + if (dev->driver_override) + return found_id; + } else { + return found_id; + } + } - return found_id; + /* driver_override will always match, send a dummy id */ + if (dev->driver_override) + return &pci_device_id_any; + return NULL; } /** diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 8e291cfdaf06..2e3ba6d9ece0 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -34,12 +34,14 @@ typedef unsigned long kernel_ulong_t; * Best practice is to use driver_data as an index * into a static list of equivalent device types, * instead of using it as a pointer. + * @override_only: Match only when dev->driver_override is this driver. */ struct pci_device_id { __u32 vendor, device; /* Vendor and device ID or PCI_ANY_ID*/ __u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */ __u32 class, class_mask; /* (class,subclass,prog-if) triplet */ kernel_ulong_t driver_data; /* Data private to the driver */ + __u32 override_only; }; diff --git a/include/linux/pci.h b/include/linux/pci.h index 540b377ca8f6..0506b1a8c921 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -901,6 +901,21 @@ struct pci_driver { .vendor = (vend), .device = (dev), \ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID +/** + * PCI_DEVICE_DRIVER_OVERRIDE - macro used to describe a PCI device with + * override_only flags. + * @vend: the 16 bit PCI Vendor ID + * @dev: the 16 bit PCI Device ID + * @driver_override: the 32 bit PCI Device override_only + * + * This macro is used to create a struct pci_device_id that matches only a + * driver_override device. The subvendor and subdevice fields will be set to + * PCI_ANY_ID. + */ +#define PCI_DEVICE_DRIVER_OVERRIDE(vend, dev, driver_override) \ + .vendor = (vend), .device = (dev), .subvendor = PCI_ANY_ID, \ + .subdevice = PCI_ANY_ID, .override_only = (driver_override) + /** * PCI_DEVICE_SUB - macro used to describe a specific PCI device with subsystem * @vend: the 16 bit PCI Vendor ID -- cgit v1.2.3 From cc6711b0bf36de068b10490198d05ac168377989 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 26 Aug 2021 13:39:09 +0300 Subject: PCI / VFIO: Add 'override_only' support for VFIO PCI sub system Expose an 'override_only' helper macro (i.e. PCI_DRIVER_OVERRIDE_DEVICE_VFIO) for VFIO PCI sub system and add the required code to prefix its matching entries with "vfio_" in modules.alias file. It allows VFIO device drivers to include match entries in the modules.alias file produced by kbuild that are not used for normal driver autoprobing and module autoloading. Drivers using these match entries can be connected to the PCI device manually, by userspace, using the existing driver_override sysfs. For example the resulting modules.alias may have: alias pci:v000015B3d00001021sv*sd*bc*sc*i* mlx5_core alias vfio_pci:v000015B3d00001021sv*sd*bc*sc*i* mlx5_vfio_pci alias vfio_pci:v*d*sv*sd*bc*sc*i* vfio_pci In this example mlx5_core and mlx5_vfio_pci match to the same PCI device. The kernel will autoload and autobind to mlx5_core but the kernel and udev mechanisms will ignore mlx5_vfio_pci. When userspace wants to change a device to the VFIO subsystem it can implement a generic algorithm: 1) Identify the sysfs path to the device: /sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0 2) Get the modalias string from the kernel: $ cat /sys/bus/pci/devices/0000:01:00.0/modalias pci:v000015B3d00001021sv000015B3sd00000001bc02sc00i00 3) Prefix it with vfio_: vfio_pci:v000015B3d00001021sv000015B3sd00000001bc02sc00i00 4) Search modules.alias for the above string and select the entry that has the fewest *'s: alias vfio_pci:v000015B3d00001021sv*sd*bc*sc*i* mlx5_vfio_pci 5) modprobe the matched module name: $ modprobe mlx5_vfio_pci 6) cat the matched module name to driver_override: echo mlx5_vfio_pci > /sys/bus/pci/devices/0000:01:00.0/driver_override 7) unbind device from original module echo 0000:01:00.0 > /sys/bus/pci/devices/0000:01:00.0/driver/unbind 8) probe PCI drivers (or explicitly bind to mlx5_vfio_pci) echo 0000:01:00.0 > /sys/bus/pci/drivers_probe The algorithm is independent of bus type. In future the other buses with VFIO device drivers, like platform and ACPI, can use this algorithm as well. This patch is the infrastructure to provide the information in the modules.alias to userspace. Convert the only VFIO pci_driver which results in one new line in the modules.alias: alias vfio_pci:v*d*sv*sd*bc*sc*i* vfio_pci Later series introduce additional HW specific VFIO PCI drivers, such as mlx5_vfio_pci. Signed-off-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe Acked-by: Bjorn Helgaas # for pci.h Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20210826103912.128972-11-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 9 ++++++++- include/linux/mod_devicetable.h | 4 ++++ include/linux/pci.h | 14 ++++++++++++++ scripts/mod/devicetable-offsets.c | 1 + scripts/mod/file2alias.c | 17 +++++++++++++++-- 5 files changed, 42 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 163e560c4495..85fd638a5955 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -178,9 +178,16 @@ static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) return vfio_pci_core_sriov_configure(pdev, nr_virtfn); } +static const struct pci_device_id vfio_pci_table[] = { + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_ANY_ID, PCI_ANY_ID) }, /* match all by default */ + {} +}; + +MODULE_DEVICE_TABLE(pci, vfio_pci_table); + static struct pci_driver vfio_pci_driver = { .name = "vfio-pci", - .id_table = NULL, /* only dynamic ids */ + .id_table = vfio_pci_table, .probe = vfio_pci_probe, .remove = vfio_pci_remove, .sriov_configure = vfio_pci_sriov_configure, diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 2e3ba6d9ece0..ae2e75d15b21 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -16,6 +16,10 @@ typedef unsigned long kernel_ulong_t; #define PCI_ANY_ID (~0) +enum { + PCI_ID_F_VFIO_DRIVER_OVERRIDE = 1, +}; + /** * struct pci_device_id - PCI device ID structure * @vendor: Vendor ID to match (or PCI_ANY_ID) diff --git a/include/linux/pci.h b/include/linux/pci.h index 0506b1a8c921..527a1dfd1d06 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -916,6 +916,20 @@ struct pci_driver { .vendor = (vend), .device = (dev), .subvendor = PCI_ANY_ID, \ .subdevice = PCI_ANY_ID, .override_only = (driver_override) +/** + * PCI_DRIVER_OVERRIDE_DEVICE_VFIO - macro used to describe a VFIO + * "driver_override" PCI device. + * @vend: the 16 bit PCI Vendor ID + * @dev: the 16 bit PCI Device ID + * + * This macro is used to create a struct pci_device_id that matches a + * specific device. The subvendor and subdevice fields will be set to + * PCI_ANY_ID and the driver_override will be set to + * PCI_ID_F_VFIO_DRIVER_OVERRIDE. + */ +#define PCI_DRIVER_OVERRIDE_DEVICE_VFIO(vend, dev) \ + PCI_DEVICE_DRIVER_OVERRIDE(vend, dev, PCI_ID_F_VFIO_DRIVER_OVERRIDE) + /** * PCI_DEVICE_SUB - macro used to describe a specific PCI device with subsystem * @vend: the 16 bit PCI Vendor ID diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index 9bb6c7edccc4..cc3625617a0e 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -42,6 +42,7 @@ int main(void) DEVID_FIELD(pci_device_id, subdevice); DEVID_FIELD(pci_device_id, class); DEVID_FIELD(pci_device_id, class_mask); + DEVID_FIELD(pci_device_id, override_only); DEVID(ccw_device_id); DEVID_FIELD(ccw_device_id, match_flags); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 7c97fa8e36bc..49aba862073e 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -426,7 +426,7 @@ static int do_ieee1394_entry(const char *filename, return 1; } -/* Looks like: pci:vNdNsvNsdNbcNscNiN. */ +/* Looks like: pci:vNdNsvNsdNbcNscNiN or _pci:vNdNsvNsdNbcNscNiN. */ static int do_pci_entry(const char *filename, void *symval, char *alias) { @@ -440,8 +440,21 @@ static int do_pci_entry(const char *filename, DEF_FIELD(symval, pci_device_id, subdevice); DEF_FIELD(symval, pci_device_id, class); DEF_FIELD(symval, pci_device_id, class_mask); + DEF_FIELD(symval, pci_device_id, override_only); + + switch (override_only) { + case 0: + strcpy(alias, "pci:"); + break; + case PCI_ID_F_VFIO_DRIVER_OVERRIDE: + strcpy(alias, "vfio_pci:"); + break; + default: + warn("Unknown PCI driver_override alias %08X\n", + override_only); + return 0; + } - strcpy(alias, "pci:"); ADD(alias, "v", vendor != PCI_ANY_ID, vendor); ADD(alias, "d", device != PCI_ANY_ID, device); ADD(alias, "sv", subvendor != PCI_ANY_ID, subvendor); -- cgit v1.2.3 From 7fa005caa35ed92563b9e9d88d319b2623763a77 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 26 Aug 2021 13:39:12 +0300 Subject: vfio/pci: Introduce vfio_pci_core.ko Now that vfio_pci has been split into two source modules, one focusing on the "struct pci_driver" (vfio_pci.c) and a toolbox library of code (vfio_pci_core.c), complete the split and move them into two different kernel modules. As before vfio_pci.ko continues to present the same interface under sysfs and this change will have no functional impact. Splitting into another module and adding exports allows creating new HW specific VFIO PCI drivers that can implement device specific functionality, such as VFIO migration interfaces or specialized device requirements. Signed-off-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe Reviewed-by: Christoph Hellwig Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20210826103912.128972-14-yishaih@nvidia.com Signed-off-by: Alex Williamson --- MAINTAINERS | 1 + drivers/vfio/pci/Kconfig | 33 ++--- drivers/vfio/pci/Makefile | 8 +- drivers/vfio/pci/vfio_pci.c | 14 +-- drivers/vfio/pci/vfio_pci_config.c | 2 +- drivers/vfio/pci/vfio_pci_core.c | 39 +++++- drivers/vfio/pci/vfio_pci_core.h | 241 ------------------------------------- drivers/vfio/pci/vfio_pci_igd.c | 2 +- drivers/vfio/pci/vfio_pci_intrs.c | 2 +- drivers/vfio/pci/vfio_pci_rdwr.c | 2 +- drivers/vfio/pci/vfio_pci_zdev.c | 2 +- include/linux/vfio_pci_core.h | 239 ++++++++++++++++++++++++++++++++++++ 12 files changed, 304 insertions(+), 281 deletions(-) delete mode 100644 drivers/vfio/pci/vfio_pci_core.h create mode 100644 include/linux/vfio_pci_core.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index c9467d2839f5..7f0fcaa8ee67 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19466,6 +19466,7 @@ T: git git://github.com/awilliam/linux-vfio.git F: Documentation/driver-api/vfio.rst F: drivers/vfio/ F: include/linux/vfio.h +F: include/linux/vfio_pci_core.h F: include/uapi/linux/vfio.h VFIO FSL-MC DRIVER diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index afdab7d71e98..860424ccda1b 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -1,19 +1,28 @@ # SPDX-License-Identifier: GPL-2.0-only -config VFIO_PCI - tristate "VFIO support for PCI devices" - depends on PCI - depends on MMU +if PCI && MMU +config VFIO_PCI_CORE + tristate select VFIO_VIRQFD select IRQ_BYPASS_MANAGER + +config VFIO_PCI_MMAP + def_bool y if !S390 + +config VFIO_PCI_INTX + def_bool y if !S390 + +config VFIO_PCI + tristate "Generic VFIO support for any PCI device" + select VFIO_PCI_CORE help - Support for the PCI VFIO bus driver. This is required to make - use of PCI drivers using the VFIO framework. + Support for the generic PCI VFIO bus driver which can connect any + PCI device to the VFIO framework. If you don't know what to do here, say N. if VFIO_PCI config VFIO_PCI_VGA - bool "VFIO PCI support for VGA devices" + bool "Generic VFIO PCI support for VGA devices" depends on X86 && VGA_ARB help Support for VGA extension to VFIO PCI. This exposes an additional @@ -22,14 +31,8 @@ config VFIO_PCI_VGA If you don't know what to do here, say N. -config VFIO_PCI_MMAP - def_bool y if !S390 - -config VFIO_PCI_INTX - def_bool y if !S390 - config VFIO_PCI_IGD - bool "VFIO PCI extensions for Intel graphics (GVT-d)" + bool "Generic VFIO PCI extensions for Intel graphics (GVT-d)" depends on X86 default y help @@ -39,5 +42,5 @@ config VFIO_PCI_IGD and LPC bridge config space. To enable Intel IGD assignment through vfio-pci, say Y. - +endif endif diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 8aa517b4b671..349d68d242b4 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -1,7 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only -vfio-pci-y := vfio_pci.o vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o -vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o -vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o +vfio-pci-core-y := vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o +vfio-pci-core-$(CONFIG_S390) += vfio_pci_zdev.o +obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o +vfio-pci-y := vfio_pci.o +vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o obj-$(CONFIG_VFIO_PCI) += vfio-pci.o diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 85fd638a5955..a5ce92beb655 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -25,7 +25,7 @@ #include #include -#include "vfio_pci_core.h" +#include #define DRIVER_AUTHOR "Alex Williamson " #define DRIVER_DESC "VFIO PCI - User Level meta-driver" @@ -153,6 +153,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = vfio_pci_core_register_device(vdev); if (ret) goto out_free; + dev_set_drvdata(&pdev->dev, vdev); return 0; out_free: @@ -246,14 +247,10 @@ static int __init vfio_pci_init(void) vfio_pci_core_set_params(nointxmask, is_disable_vga, disable_idle_d3); - ret = vfio_pci_core_init(); - if (ret) - return ret; - /* Register and scan for devices */ ret = pci_register_driver(&vfio_pci_driver); if (ret) - goto out; + return ret; vfio_pci_fill_ids(); @@ -261,17 +258,12 @@ static int __init vfio_pci_init(void) pr_warn("device denylist disabled.\n"); return 0; - -out: - vfio_pci_core_cleanup(); - return ret; } module_init(vfio_pci_init); static void __exit vfio_pci_cleanup(void) { pci_unregister_driver(&vfio_pci_driver); - vfio_pci_core_cleanup(); } module_exit(vfio_pci_cleanup); diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 1f034f768a27..6e58b4bf7a60 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -26,7 +26,7 @@ #include #include -#include "vfio_pci_core.h" +#include /* Fake capability ID for standard config space */ #define PCI_CAP_ID_BASIC 0 diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 65eafaafb2e0..675616e08897 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -8,6 +8,8 @@ * Author: Tom Lyon, pugs@cisco.com */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -25,7 +27,10 @@ #include #include -#include "vfio_pci_core.h" +#include + +#define DRIVER_AUTHOR "Alex Williamson " +#define DRIVER_DESC "core driver for VFIO based PCI devices" static bool nointxmask; static bool disable_vga; @@ -306,6 +311,7 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev) return 0; } +EXPORT_SYMBOL_GPL(vfio_pci_core_enable); void vfio_pci_core_disable(struct vfio_pci_core_device *vdev) { @@ -403,6 +409,7 @@ out: if (!vfio_pci_dev_set_try_reset(vdev->vdev.dev_set) && !disable_idle_d3) vfio_pci_set_power_state(vdev, PCI_D3hot); } +EXPORT_SYMBOL_GPL(vfio_pci_core_disable); static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vdev) { @@ -459,6 +466,7 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev) } mutex_unlock(&vdev->igate); } +EXPORT_SYMBOL_GPL(vfio_pci_core_close_device); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) { @@ -466,6 +474,7 @@ void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) vfio_spapr_pci_eeh_open(vdev->pdev); vfio_pci_vf_token_user_add(vdev, 1); } +EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable); static int vfio_pci_get_irq_count(struct vfio_pci_core_device *vdev, int irq_type) { @@ -624,6 +633,7 @@ int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev, return 0; } +EXPORT_SYMBOL_GPL(vfio_pci_register_dev_region); long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg) @@ -1168,6 +1178,7 @@ hot_reset_release: return -ENOTTY; } +EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl); static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) @@ -1211,6 +1222,7 @@ ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf, return vfio_pci_rw(vdev, buf, count, ppos, false); } +EXPORT_SYMBOL_GPL(vfio_pci_core_read); ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, size_t count, loff_t *ppos) @@ -1223,6 +1235,7 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu return vfio_pci_rw(vdev, (char __user *)buf, count, ppos, true); } +EXPORT_SYMBOL_GPL(vfio_pci_core_write); /* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_core_device *vdev, bool try) @@ -1501,6 +1514,7 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma return 0; } +EXPORT_SYMBOL_GPL(vfio_pci_core_mmap); void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count) { @@ -1523,6 +1537,7 @@ void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count) mutex_unlock(&vdev->igate); } +EXPORT_SYMBOL_GPL(vfio_pci_core_request); static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, bool vf_token, uuid_t *uuid) @@ -1667,6 +1682,7 @@ int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf) return 1; /* Match */ } +EXPORT_SYMBOL_GPL(vfio_pci_core_match); static int vfio_pci_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) @@ -1775,6 +1791,7 @@ void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev, INIT_LIST_HEAD(&vdev->vma_list); init_rwsem(&vdev->memory_lock); } +EXPORT_SYMBOL_GPL(vfio_pci_core_init_device); void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev) { @@ -1785,6 +1802,7 @@ void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev) kfree(vdev->region); kfree(vdev->pm_save); } +EXPORT_SYMBOL_GPL(vfio_pci_core_uninit_device); int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) { @@ -1852,7 +1870,6 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) ret = vfio_register_group_dev(&vdev->vdev); if (ret) goto out_power; - dev_set_drvdata(&pdev->dev, vdev); return 0; out_power: @@ -1864,6 +1881,7 @@ out_group_put: vfio_iommu_group_put(group, &pdev->dev); return ret; } +EXPORT_SYMBOL_GPL(vfio_pci_core_register_device); void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev) { @@ -1881,6 +1899,7 @@ void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev) if (!disable_idle_d3) vfio_pci_set_power_state(vdev, PCI_D0); } +EXPORT_SYMBOL_GPL(vfio_pci_core_unregister_device); static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, pci_channel_state_t state) @@ -1924,10 +1943,12 @@ int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn) return ret < 0 ? ret : nr_virtfn; } +EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure); const struct pci_error_handlers vfio_pci_core_err_handlers = { .error_detected = vfio_pci_aer_err_detected, }; +EXPORT_SYMBOL_GPL(vfio_pci_core_err_handlers); static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev, struct vfio_pci_group_info *groups) @@ -2116,16 +2137,22 @@ void vfio_pci_core_set_params(bool is_nointxmask, bool is_disable_vga, disable_vga = is_disable_vga; disable_idle_d3 = is_disable_idle_d3; } +EXPORT_SYMBOL_GPL(vfio_pci_core_set_params); -/* This will become the __exit function of vfio_pci_core.ko */ -void vfio_pci_core_cleanup(void) +static void vfio_pci_core_cleanup(void) { vfio_pci_uninit_perm_bits(); } -/* This will become the __init function of vfio_pci_core.ko */ -int __init vfio_pci_core_init(void) +static int __init vfio_pci_core_init(void) { /* Allocate shared config space permission data used by all devices */ return vfio_pci_init_perm_bits(); } + +module_init(vfio_pci_core_init); +module_exit(vfio_pci_core_cleanup); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/vfio/pci/vfio_pci_core.h b/drivers/vfio/pci/vfio_pci_core.h deleted file mode 100644 index 7a2da1e14de3..000000000000 --- a/drivers/vfio/pci/vfio_pci_core.h +++ /dev/null @@ -1,241 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 Red Hat, Inc. All rights reserved. - * Author: Alex Williamson - * - * Derived from original vfio: - * Copyright 2010 Cisco Systems, Inc. All rights reserved. - * Author: Tom Lyon, pugs@cisco.com - */ - -#include -#include -#include -#include -#include -#include -#include - -#ifndef VFIO_PCI_CORE_H -#define VFIO_PCI_CORE_H - -#define VFIO_PCI_OFFSET_SHIFT 40 - -#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) -#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) -#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) - -/* Special capability IDs predefined access */ -#define PCI_CAP_ID_INVALID 0xFF /* default raw access */ -#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */ - -/* Cap maximum number of ioeventfds per device (arbitrary) */ -#define VFIO_PCI_IOEVENTFD_MAX 1000 - -struct vfio_pci_ioeventfd { - struct list_head next; - struct vfio_pci_core_device *vdev; - struct virqfd *virqfd; - void __iomem *addr; - uint64_t data; - loff_t pos; - int bar; - int count; - bool test_mem; -}; - -struct vfio_pci_irq_ctx { - struct eventfd_ctx *trigger; - struct virqfd *unmask; - struct virqfd *mask; - char *name; - bool masked; - struct irq_bypass_producer producer; -}; - -struct vfio_pci_core_device; -struct vfio_pci_region; - -struct vfio_pci_regops { - ssize_t (*rw)(struct vfio_pci_core_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite); - void (*release)(struct vfio_pci_core_device *vdev, - struct vfio_pci_region *region); - int (*mmap)(struct vfio_pci_core_device *vdev, - struct vfio_pci_region *region, - struct vm_area_struct *vma); - int (*add_capability)(struct vfio_pci_core_device *vdev, - struct vfio_pci_region *region, - struct vfio_info_cap *caps); -}; - -struct vfio_pci_region { - u32 type; - u32 subtype; - const struct vfio_pci_regops *ops; - void *data; - size_t size; - u32 flags; -}; - -struct vfio_pci_dummy_resource { - struct resource resource; - int index; - struct list_head res_next; -}; - -struct vfio_pci_vf_token { - struct mutex lock; - uuid_t uuid; - int users; -}; - -struct vfio_pci_mmap_vma { - struct vm_area_struct *vma; - struct list_head vma_next; -}; - -struct vfio_pci_core_device { - struct vfio_device vdev; - struct pci_dev *pdev; - void __iomem *barmap[PCI_STD_NUM_BARS]; - bool bar_mmap_supported[PCI_STD_NUM_BARS]; - u8 *pci_config_map; - u8 *vconfig; - struct perm_bits *msi_perm; - spinlock_t irqlock; - struct mutex igate; - struct vfio_pci_irq_ctx *ctx; - int num_ctx; - int irq_type; - int num_regions; - struct vfio_pci_region *region; - u8 msi_qmax; - u8 msix_bar; - u16 msix_size; - u32 msix_offset; - u32 rbar[7]; - bool pci_2_3; - bool virq_disabled; - bool reset_works; - bool extended_caps; - bool bardirty; - bool has_vga; - bool needs_reset; - bool nointx; - bool needs_pm_restore; - struct pci_saved_state *pci_saved_state; - struct pci_saved_state *pm_save; - int ioeventfds_nr; - struct eventfd_ctx *err_trigger; - struct eventfd_ctx *req_trigger; - struct list_head dummy_resources_list; - struct mutex ioeventfds_lock; - struct list_head ioeventfds_list; - struct vfio_pci_vf_token *vf_token; - struct notifier_block nb; - struct mutex vma_lock; - struct list_head vma_list; - struct rw_semaphore memory_lock; -}; - -#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX) -#define is_msi(vdev) (vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX) -#define is_msix(vdev) (vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX) -#define is_irq_none(vdev) (!(is_intx(vdev) || is_msi(vdev) || is_msix(vdev))) -#define irq_is(vdev, type) (vdev->irq_type == type) - -extern void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev); -extern void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev); - -extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, - uint32_t flags, unsigned index, - unsigned start, unsigned count, void *data); - -extern ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite); - -extern ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite); - -extern ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite); - -extern long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, - uint64_t data, int count, int fd); - -extern int vfio_pci_init_perm_bits(void); -extern void vfio_pci_uninit_perm_bits(void); - -extern int vfio_config_init(struct vfio_pci_core_device *vdev); -extern void vfio_config_free(struct vfio_pci_core_device *vdev); - -extern int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev, - unsigned int type, unsigned int subtype, - const struct vfio_pci_regops *ops, - size_t size, u32 flags, void *data); - -extern int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, - pci_power_t state); - -extern bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev); -extern void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device - *vdev); -extern u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev); -extern void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, - u16 cmd); - -#ifdef CONFIG_VFIO_PCI_IGD -extern int vfio_pci_igd_init(struct vfio_pci_core_device *vdev); -#else -static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) -{ - return -ENODEV; -} -#endif - -#ifdef CONFIG_S390 -extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, - struct vfio_info_cap *caps); -#else -static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, - struct vfio_info_cap *caps) -{ - return -ENODEV; -} -#endif - -/* Will be exported for vfio pci drivers usage */ -void vfio_pci_core_cleanup(void); -int vfio_pci_core_init(void); -void vfio_pci_core_set_params(bool nointxmask, bool is_disable_vga, - bool is_disable_idle_d3); -void vfio_pci_core_close_device(struct vfio_device *core_vdev); -void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev, - struct pci_dev *pdev, - const struct vfio_device_ops *vfio_pci_ops); -int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev); -void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev); -void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev); -int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn); -extern const struct pci_error_handlers vfio_pci_core_err_handlers; -long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, - unsigned long arg); -ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf, - size_t count, loff_t *ppos); -ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, - size_t count, loff_t *ppos); -int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma); -void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count); -int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); -int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); -void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); -void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); - -static inline bool vfio_pci_is_vga(struct pci_dev *pdev) -{ - return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; -} - -#endif /* VFIO_PCI_CORE_H */ diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index a324ca7e6b5a..7ca4109bba48 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -15,7 +15,7 @@ #include #include -#include "vfio_pci_core.h" +#include #define OPREGION_SIGNATURE "IntelGraphicsMem" #define OPREGION_SIZE (8 * 1024) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 945ddbdf4d11..6069a11fb51a 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -20,7 +20,7 @@ #include #include -#include "vfio_pci_core.h" +#include /* * INTx diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 8fff4689dd44..57d3b2cbbd8e 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -17,7 +17,7 @@ #include #include -#include "vfio_pci_core.h" +#include #ifdef __LITTLE_ENDIAN #define vfio_ioread64 ioread64 diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c index 2ffbdc11f089..fe4def9ffffb 100644 --- a/drivers/vfio/pci/vfio_pci_zdev.c +++ b/drivers/vfio/pci/vfio_pci_zdev.c @@ -19,7 +19,7 @@ #include #include -#include "vfio_pci_core.h" +#include /* * Add the Base PCI Function information to the device info region. diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h new file mode 100644 index 000000000000..ef9a44b6cf5d --- /dev/null +++ b/include/linux/vfio_pci_core.h @@ -0,0 +1,239 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson + * + * Derived from original vfio: + * Copyright 2010 Cisco Systems, Inc. All rights reserved. + * Author: Tom Lyon, pugs@cisco.com + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifndef VFIO_PCI_CORE_H +#define VFIO_PCI_CORE_H + +#define VFIO_PCI_OFFSET_SHIFT 40 + +#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) +#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) +#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) + +/* Special capability IDs predefined access */ +#define PCI_CAP_ID_INVALID 0xFF /* default raw access */ +#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */ + +/* Cap maximum number of ioeventfds per device (arbitrary) */ +#define VFIO_PCI_IOEVENTFD_MAX 1000 + +struct vfio_pci_ioeventfd { + struct list_head next; + struct vfio_pci_core_device *vdev; + struct virqfd *virqfd; + void __iomem *addr; + uint64_t data; + loff_t pos; + int bar; + int count; + bool test_mem; +}; + +struct vfio_pci_irq_ctx { + struct eventfd_ctx *trigger; + struct virqfd *unmask; + struct virqfd *mask; + char *name; + bool masked; + struct irq_bypass_producer producer; +}; + +struct vfio_pci_core_device; +struct vfio_pci_region; + +struct vfio_pci_regops { + ssize_t (*rw)(struct vfio_pci_core_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); + void (*release)(struct vfio_pci_core_device *vdev, + struct vfio_pci_region *region); + int (*mmap)(struct vfio_pci_core_device *vdev, + struct vfio_pci_region *region, + struct vm_area_struct *vma); + int (*add_capability)(struct vfio_pci_core_device *vdev, + struct vfio_pci_region *region, + struct vfio_info_cap *caps); +}; + +struct vfio_pci_region { + u32 type; + u32 subtype; + const struct vfio_pci_regops *ops; + void *data; + size_t size; + u32 flags; +}; + +struct vfio_pci_dummy_resource { + struct resource resource; + int index; + struct list_head res_next; +}; + +struct vfio_pci_vf_token { + struct mutex lock; + uuid_t uuid; + int users; +}; + +struct vfio_pci_mmap_vma { + struct vm_area_struct *vma; + struct list_head vma_next; +}; + +struct vfio_pci_core_device { + struct vfio_device vdev; + struct pci_dev *pdev; + void __iomem *barmap[PCI_STD_NUM_BARS]; + bool bar_mmap_supported[PCI_STD_NUM_BARS]; + u8 *pci_config_map; + u8 *vconfig; + struct perm_bits *msi_perm; + spinlock_t irqlock; + struct mutex igate; + struct vfio_pci_irq_ctx *ctx; + int num_ctx; + int irq_type; + int num_regions; + struct vfio_pci_region *region; + u8 msi_qmax; + u8 msix_bar; + u16 msix_size; + u32 msix_offset; + u32 rbar[7]; + bool pci_2_3; + bool virq_disabled; + bool reset_works; + bool extended_caps; + bool bardirty; + bool has_vga; + bool needs_reset; + bool nointx; + bool needs_pm_restore; + struct pci_saved_state *pci_saved_state; + struct pci_saved_state *pm_save; + int ioeventfds_nr; + struct eventfd_ctx *err_trigger; + struct eventfd_ctx *req_trigger; + struct list_head dummy_resources_list; + struct mutex ioeventfds_lock; + struct list_head ioeventfds_list; + struct vfio_pci_vf_token *vf_token; + struct notifier_block nb; + struct mutex vma_lock; + struct list_head vma_list; + struct rw_semaphore memory_lock; +}; + +#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX) +#define is_msi(vdev) (vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX) +#define is_msix(vdev) (vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX) +#define is_irq_none(vdev) (!(is_intx(vdev) || is_msi(vdev) || is_msix(vdev))) +#define irq_is(vdev, type) (vdev->irq_type == type) + +extern void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev); +extern void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev); + +extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, + uint32_t flags, unsigned index, + unsigned start, unsigned count, void *data); + +extern ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, + char __user *buf, size_t count, + loff_t *ppos, bool iswrite); + +extern ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); + +extern ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); + +extern long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, + uint64_t data, int count, int fd); + +extern int vfio_pci_init_perm_bits(void); +extern void vfio_pci_uninit_perm_bits(void); + +extern int vfio_config_init(struct vfio_pci_core_device *vdev); +extern void vfio_config_free(struct vfio_pci_core_device *vdev); + +extern int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev, + unsigned int type, unsigned int subtype, + const struct vfio_pci_regops *ops, + size_t size, u32 flags, void *data); + +extern int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, + pci_power_t state); + +extern bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev); +extern void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device + *vdev); +extern u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev); +extern void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, + u16 cmd); + +#ifdef CONFIG_VFIO_PCI_IGD +extern int vfio_pci_igd_init(struct vfio_pci_core_device *vdev); +#else +static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) +{ + return -ENODEV; +} +#endif + +#ifdef CONFIG_S390 +extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, + struct vfio_info_cap *caps); +#else +static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, + struct vfio_info_cap *caps) +{ + return -ENODEV; +} +#endif + +/* Will be exported for vfio pci drivers usage */ +void vfio_pci_core_set_params(bool nointxmask, bool is_disable_vga, + bool is_disable_idle_d3); +void vfio_pci_core_close_device(struct vfio_device *core_vdev); +void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev, + struct pci_dev *pdev, + const struct vfio_device_ops *vfio_pci_ops); +int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev); +void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev); +void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev); +int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn); +extern const struct pci_error_handlers vfio_pci_core_err_handlers; +long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, + unsigned long arg); +ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf, + size_t count, loff_t *ppos); +ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, + size_t count, loff_t *ppos); +int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma); +void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count); +int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); +int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); +void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); +void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); + +static inline bool vfio_pci_is_vga(struct pci_dev *pdev) +{ + return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; +} + +#endif /* VFIO_PCI_CORE_H */ -- cgit v1.2.3