From 55c91fedd03d7b9cf0c5199b2eb12b9b8e95281a Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 29 Jun 2023 14:05:26 +0200 Subject: virtio-mmio: don't break lifecycle of vm_dev vm_dev has a separate lifecycle because it has a 'struct device' embedded. Thus, having a release callback for it is correct. Allocating the vm_dev struct with devres totally breaks this protection, though. Instead of waiting for the vm_dev release callback, the memory is freed when the platform_device is removed. Resulting in a use-after-free when finally the callback is to be called. To easily see the problem, compile the kernel with CONFIG_DEBUG_KOBJECT_RELEASE and unbind with sysfs. The fix is easy, don't use devres in this case. Found during my research about object lifetime problems. Fixes: 7eb781b1bbb7 ("virtio_mmio: add cleanup for virtio_mmio_probe") Signed-off-by: Wolfram Sang Message-Id: <20230629120526.7184-1-wsa+renesas@sang-engineering.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mmio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index a46a4a29e929..97760f611295 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -607,9 +607,8 @@ static void virtio_mmio_release_dev(struct device *_d) struct virtio_device *vdev = container_of(_d, struct virtio_device, dev); struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); - struct platform_device *pdev = vm_dev->pdev; - devm_kfree(&pdev->dev, vm_dev); + kfree(vm_dev); } /* Platform device */ @@ -620,7 +619,7 @@ static int virtio_mmio_probe(struct platform_device *pdev) unsigned long magic; int rc; - vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL); + vm_dev = kzalloc(sizeof(*vm_dev), GFP_KERNEL); if (!vm_dev) return -ENOMEM; -- cgit v1.2.3 From 9ad1a29cb0991e3145996cdce691525e8ac65db7 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 6 Jul 2023 16:17:18 -0700 Subject: pds_vdpa: protect Makefile from unconfigured debugfs debugfs.h protects itself from an undefined DEBUG_FS, so it is not necessary to check it in the driver code or the Makefile. The driver code had been updated for this, but the Makefile had missed the update. Link: https://lore.kernel.org/linux-next/fec68c3c-8249-7af4-5390-0495386a76f9@infradead.org/ Fixes: a16291b5bcbb ("pds_vdpa: Add new vDPA driver for AMD/Pensando DSC") Signed-off-by: Shannon Nelson Message-Id: <20230706231718.54198-1-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Acked-by: Jason Wang --- drivers/vdpa/pds/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/vdpa/pds/Makefile b/drivers/vdpa/pds/Makefile index 2e22418e3ab3..c2d314d4614d 100644 --- a/drivers/vdpa/pds/Makefile +++ b/drivers/vdpa/pds/Makefile @@ -5,6 +5,5 @@ obj-$(CONFIG_PDS_VDPA) := pds_vdpa.o pds_vdpa-y := aux_drv.o \ cmds.o \ + debugfs.o \ vdpa_dev.o - -pds_vdpa-$(CONFIG_DEBUG_FS) += debugfs.o -- cgit v1.2.3 From 5ced58bfa132c8ba0f9c893eb621595a84cfee12 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 9 Jul 2023 15:28:58 -0500 Subject: vhost-scsi: Fix alignment handling with windows The linux block layer requires bios/requests to have lengths with a 512 byte alignment. Some drivers/layers like dm-crypt and the directi IO code will test for it and just fail. Other drivers like SCSI just assume the requirement is met and will end up in infinte retry loops. The problem for drivers like SCSI is that it uses functions like blk_rq_cur_sectors and blk_rq_sectors which divide the request's length by 512. If there's lefovers then it just gets dropped. But other code in the block/scsi layer may use blk_rq_bytes/blk_rq_cur_bytes and end up thinking there is still data left and try to retry the cmd. We can then end up getting stuck in retry loops where part of the block/scsi thinks there is data left, but other parts think we want to do IOs of zero length. Linux will always check for alignment, but windows will not. When vhost-scsi then translates the iovec it gets from a windows guest to a scatterlist, we can end up with sg items where the sg->length is not divisible by 512 due to the misaligned offset: sg[0].offset = 255; sg[0].length = 3841; sg... sg[N].offset = 0; sg[N].length = 255; When the lio backends then convert the SG to bios or other iovecs, we end up sending them with the same misaligned values and can hit the issues above. This just has us drop down to allocating a temp page and copying the data when we detect a misaligned buffer and the IO is large enough that it will get split into multiple bad IOs. Signed-off-by: Mike Christie Message-Id: <20230709202859.138387-2-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 186 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 161 insertions(+), 25 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index c83f7f043470..324e4b3846fa 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include #include @@ -75,6 +77,9 @@ struct vhost_scsi_cmd { u32 tvc_prot_sgl_count; /* Saved unpacked SCSI LUN for vhost_scsi_target_queue_cmd() */ u32 tvc_lun; + u32 copied_iov:1; + const void *saved_iter_addr; + struct iov_iter saved_iter; /* Pointer to the SGL formatted memory from virtio-scsi */ struct scatterlist *tvc_sgl; struct scatterlist *tvc_prot_sgl; @@ -328,8 +333,13 @@ static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd) int i; if (tv_cmd->tvc_sgl_count) { - for (i = 0; i < tv_cmd->tvc_sgl_count; i++) - put_page(sg_page(&tv_cmd->tvc_sgl[i])); + for (i = 0; i < tv_cmd->tvc_sgl_count; i++) { + if (tv_cmd->copied_iov) + __free_page(sg_page(&tv_cmd->tvc_sgl[i])); + else + put_page(sg_page(&tv_cmd->tvc_sgl[i])); + } + kfree(tv_cmd->saved_iter_addr); } if (tv_cmd->tvc_prot_sgl_count) { for (i = 0; i < tv_cmd->tvc_prot_sgl_count; i++) @@ -504,6 +514,28 @@ static void vhost_scsi_evt_work(struct vhost_work *work) mutex_unlock(&vq->mutex); } +static int vhost_scsi_copy_sgl_to_iov(struct vhost_scsi_cmd *cmd) +{ + struct iov_iter *iter = &cmd->saved_iter; + struct scatterlist *sg = cmd->tvc_sgl; + struct page *page; + size_t len; + int i; + + for (i = 0; i < cmd->tvc_sgl_count; i++) { + page = sg_page(&sg[i]); + len = sg[i].length; + + if (copy_page_to_iter(page, 0, len, iter) != len) { + pr_err("Could not copy data while handling misaligned cmd. Error %zu\n", + len); + return -1; + } + } + + return 0; +} + /* Fill in status and signal that we are done processing this command * * This is scheduled in the vhost work queue so we are called with the owner @@ -527,15 +559,20 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) pr_debug("%s tv_cmd %p resid %u status %#02x\n", __func__, cmd, se_cmd->residual_count, se_cmd->scsi_status); - memset(&v_rsp, 0, sizeof(v_rsp)); - v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq, se_cmd->residual_count); - /* TODO is status_qualifier field needed? */ - v_rsp.status = se_cmd->scsi_status; - v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq, - se_cmd->scsi_sense_length); - memcpy(v_rsp.sense, cmd->tvc_sense_buf, - se_cmd->scsi_sense_length); + + if (cmd->saved_iter_addr && vhost_scsi_copy_sgl_to_iov(cmd)) { + v_rsp.response = VIRTIO_SCSI_S_BAD_TARGET; + } else { + v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq, + se_cmd->residual_count); + /* TODO is status_qualifier field needed? */ + v_rsp.status = se_cmd->scsi_status; + v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq, + se_cmd->scsi_sense_length); + memcpy(v_rsp.sense, cmd->tvc_sense_buf, + se_cmd->scsi_sense_length); + } iov_iter_init(&iov_iter, ITER_DEST, cmd->tvc_resp_iov, cmd->tvc_in_iovs, sizeof(v_rsp)); @@ -613,12 +650,12 @@ static int vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, struct scatterlist *sgl, - bool write) + bool is_prot) { struct page **pages = cmd->tvc_upages; struct scatterlist *sg = sgl; - ssize_t bytes; - size_t offset; + ssize_t bytes, mapped_bytes; + size_t offset, mapped_offset; unsigned int npages = 0; bytes = iov_iter_get_pages2(iter, pages, LONG_MAX, @@ -627,13 +664,53 @@ vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd, if (bytes <= 0) return bytes < 0 ? bytes : -EFAULT; + mapped_bytes = bytes; + mapped_offset = offset; + while (bytes) { unsigned n = min_t(unsigned, PAGE_SIZE - offset, bytes); + /* + * The block layer requires bios/requests to be a multiple of + * 512 bytes, but Windows can send us vecs that are misaligned. + * This can result in bios and later requests with misaligned + * sizes if we have to break up a cmd/scatterlist into multiple + * bios. + * + * We currently only break up a command into multiple bios if + * we hit the vec/seg limit, so check if our sgl_count is + * greater than the max and if a vec in the cmd has a + * misaligned offset/size. + */ + if (!is_prot && + (offset & (SECTOR_SIZE - 1) || n & (SECTOR_SIZE - 1)) && + cmd->tvc_sgl_count > BIO_MAX_VECS) { + WARN_ONCE(true, + "vhost-scsi detected misaligned IO. Performance may be degraded."); + goto revert_iter_get_pages; + } + sg_set_page(sg++, pages[npages++], n, offset); bytes -= n; offset = 0; } + return npages; + +revert_iter_get_pages: + iov_iter_revert(iter, mapped_bytes); + + npages = 0; + while (mapped_bytes) { + unsigned int n = min_t(unsigned int, PAGE_SIZE - mapped_offset, + mapped_bytes); + + put_page(pages[npages++]); + + mapped_bytes -= n; + mapped_offset = 0; + } + + return -EINVAL; } static int @@ -657,25 +734,80 @@ vhost_scsi_calc_sgls(struct iov_iter *iter, size_t bytes, int max_sgls) } static int -vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write, - struct iov_iter *iter, - struct scatterlist *sg, int sg_count) +vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, + struct scatterlist *sg, int sg_count) +{ + size_t len = iov_iter_count(iter); + unsigned int nbytes = 0; + struct page *page; + int i; + + if (cmd->tvc_data_direction == DMA_FROM_DEVICE) { + cmd->saved_iter_addr = dup_iter(&cmd->saved_iter, iter, + GFP_KERNEL); + if (!cmd->saved_iter_addr) + return -ENOMEM; + } + + for (i = 0; i < sg_count; i++) { + page = alloc_page(GFP_KERNEL); + if (!page) { + i--; + goto err; + } + + nbytes = min_t(unsigned int, PAGE_SIZE, len); + sg_set_page(&sg[i], page, nbytes, 0); + + if (cmd->tvc_data_direction == DMA_TO_DEVICE && + copy_page_from_iter(page, 0, nbytes, iter) != nbytes) + goto err; + + len -= nbytes; + } + + cmd->copied_iov = 1; + return 0; + +err: + pr_err("Could not read %u bytes while handling misaligned cmd\n", + nbytes); + + for (; i >= 0; i--) + __free_page(sg_page(&sg[i])); + kfree(cmd->saved_iter_addr); + return -ENOMEM; +} + +static int +vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, + struct scatterlist *sg, int sg_count, bool is_prot) { struct scatterlist *p = sg; + size_t revert_bytes; int ret; while (iov_iter_count(iter)) { - ret = vhost_scsi_map_to_sgl(cmd, iter, sg, write); + ret = vhost_scsi_map_to_sgl(cmd, iter, sg, is_prot); if (ret < 0) { + revert_bytes = 0; + while (p < sg) { - struct page *page = sg_page(p++); - if (page) + struct page *page = sg_page(p); + + if (page) { put_page(page); + revert_bytes += p->length; + } + p++; } + + iov_iter_revert(iter, revert_bytes); return ret; } sg += ret; } + return 0; } @@ -685,7 +817,6 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd, size_t data_bytes, struct iov_iter *data_iter) { int sgl_count, ret; - bool write = (cmd->tvc_data_direction == DMA_FROM_DEVICE); if (prot_bytes) { sgl_count = vhost_scsi_calc_sgls(prot_iter, prot_bytes, @@ -698,9 +829,8 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd, pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__, cmd->tvc_prot_sgl, cmd->tvc_prot_sgl_count); - ret = vhost_scsi_iov_to_sgl(cmd, write, prot_iter, - cmd->tvc_prot_sgl, - cmd->tvc_prot_sgl_count); + ret = vhost_scsi_iov_to_sgl(cmd, prot_iter, cmd->tvc_prot_sgl, + cmd->tvc_prot_sgl_count, true); if (ret < 0) { cmd->tvc_prot_sgl_count = 0; return ret; @@ -716,8 +846,14 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd, pr_debug("%s data_sg %p data_sgl_count %u\n", __func__, cmd->tvc_sgl, cmd->tvc_sgl_count); - ret = vhost_scsi_iov_to_sgl(cmd, write, data_iter, - cmd->tvc_sgl, cmd->tvc_sgl_count); + ret = vhost_scsi_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl, + cmd->tvc_sgl_count, false); + if (ret == -EINVAL) { + sg_init_table(cmd->tvc_sgl, cmd->tvc_sgl_count); + ret = vhost_scsi_copy_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl, + cmd->tvc_sgl_count); + } + if (ret < 0) { cmd->tvc_sgl_count = 0; return ret; -- cgit v1.2.3 From c5ace19efb0ac884a9a417e2a1499ce9849bdaa5 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 9 Jul 2023 15:28:59 -0500 Subject: vhost-scsi: Rename vhost_scsi_iov_to_sgl Rename vhost_scsi_iov_to_sgl to vhost_scsi_map_iov_to_sgl so it matches matches the naming style used for vhost_scsi_copy_iov_to_sgl. Signed-off-by: Mike Christie Message-Id: <20230709202859.138387-3-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Stefan Hajnoczi --- drivers/vhost/scsi.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 324e4b3846fa..abef0619c790 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -780,8 +780,8 @@ err: } static int -vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, - struct scatterlist *sg, int sg_count, bool is_prot) +vhost_scsi_map_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter, + struct scatterlist *sg, int sg_count, bool is_prot) { struct scatterlist *p = sg; size_t revert_bytes; @@ -829,8 +829,9 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd, pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__, cmd->tvc_prot_sgl, cmd->tvc_prot_sgl_count); - ret = vhost_scsi_iov_to_sgl(cmd, prot_iter, cmd->tvc_prot_sgl, - cmd->tvc_prot_sgl_count, true); + ret = vhost_scsi_map_iov_to_sgl(cmd, prot_iter, + cmd->tvc_prot_sgl, + cmd->tvc_prot_sgl_count, true); if (ret < 0) { cmd->tvc_prot_sgl_count = 0; return ret; @@ -846,8 +847,8 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd, pr_debug("%s data_sg %p data_sgl_count %u\n", __func__, cmd->tvc_sgl, cmd->tvc_sgl_count); - ret = vhost_scsi_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl, - cmd->tvc_sgl_count, false); + ret = vhost_scsi_map_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl, + cmd->tvc_sgl_count, false); if (ret == -EINVAL) { sg_init_table(cmd->tvc_sgl, cmd->tvc_sgl_count); ret = vhost_scsi_copy_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl, -- cgit v1.2.3 From 8d4bdf11f096e5b343ee0f9aaa8c262dc16d2e1e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 15 Jul 2023 09:20:27 -0500 Subject: MAINTAINERS: add vhost-scsi entry and myself as a co-maintainer I've been doing a lot of the development on vhost-scsi the last couple of years, so per Michael T's suggestion this adds me as co-maintainer. Signed-off-by: Mike Christie Message-Id: <20230715142027.5572-1-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Stefano Garzarella Acked-by: Stefan Hajnoczi --- MAINTAINERS | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0f966f05fb0d..63a31c3bf7d4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22476,7 +22476,6 @@ L: virtualization@lists.linux-foundation.org S: Maintained F: drivers/block/virtio_blk.c F: drivers/scsi/virtio_scsi.c -F: drivers/vhost/scsi.c F: include/uapi/linux/virtio_blk.h F: include/uapi/linux/virtio_scsi.h @@ -22575,6 +22574,16 @@ F: include/linux/vhost_iotlb.h F: include/uapi/linux/vhost.h F: kernel/vhost_task.c +VIRTIO HOST (VHOST-SCSI) +M: "Michael S. Tsirkin" +M: Jason Wang +M: Mike Christie +R: Paolo Bonzini +R: Stefan Hajnoczi +L: virtualization@lists.linux-foundation.org +S: Maintained +F: drivers/vhost/scsi.c + VIRTIO I2C DRIVER M: Conghui Chen M: Viresh Kumar -- cgit v1.2.3 From 13f3efaca024e16ccfab0e8b2cf29d66489d8d54 Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Wed, 19 Jul 2023 11:45:50 -0400 Subject: virtio-pci: Fix legacy device flag setting error in probe The 'is_legacy' flag is used to differentiate between legacy vs modern device. Currently, it is based on the value of vp_dev->ldev.ioaddr. However, due to the shared memory of the union between struct virtio_pci_legacy_device and struct virtio_pci_modern_device, when virtio_pci_modern_probe modifies the content of struct virtio_pci_modern_device, it affects the content of struct virtio_pci_legacy_device, and ldev.ioaddr is no longer zero, causing the 'is_legacy' flag to be set as true. To resolve issue, when legacy device is probed, mark 'is_legacy' as true, when modern device is probed, keep 'is_legacy' as false. Fixes: 4f0fc22534e3 ("virtio_pci: Optimize virtio_pci_device structure size") Signed-off-by: Feng Liu Reviewed-by: Parav Pandit Reviewed-by: Jiri Pirko Message-Id: <20230719154550.79536-1-feliu@nvidia.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo Acked-by: Jason Wang --- drivers/virtio/virtio_pci_common.c | 2 -- drivers/virtio/virtio_pci_legacy.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index a6c86f916dbd..c2524a7207cf 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -557,8 +557,6 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, pci_set_master(pci_dev); - vp_dev->is_legacy = vp_dev->ldev.ioaddr ? true : false; - rc = register_virtio_device(&vp_dev->vdev); reg_dev = vp_dev; if (rc) diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 2257f1b3d8ae..d9cbb02b35a1 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -223,6 +223,7 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) vp_dev->config_vector = vp_config_vector; vp_dev->setup_vq = setup_vq; vp_dev->del_vq = del_vq; + vp_dev->is_legacy = true; return 0; } -- cgit v1.2.3 From 79c8651587504ba263d2fd67fd4406240fb21f69 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Thu, 27 Jul 2023 20:57:48 +0300 Subject: vdpa: Add features attr to vdpa_nl_policy for nlattr length check The vdpa_nl_policy structure is used to validate the nlattr when parsing the incoming nlmsg. It will ensure the attribute being described produces a valid nlattr pointer in info->attrs before entering into each handler in vdpa_nl_ops. That is to say, the missing part in vdpa_nl_policy may lead to illegal nlattr after parsing, which could lead to OOB read just like CVE-2023-3773. This patch adds the missing nla_policy for vdpa features attr to avoid such bugs. Fixes: 90fea5a800c3 ("vdpa: device feature provisioning") Signed-off-by: Lin Ma Cc: stable@vger.kernel.org Message-Id: <20230727175757.73988-3-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 965e32529eb8..3ad355a2208a 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -1249,6 +1249,7 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR, /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */ [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68), + [VDPA_ATTR_DEV_FEATURES] = { .type = NLA_U64 }, }; static const struct genl_ops vdpa_nl_ops[] = { -- cgit v1.2.3 From b3003e1b54e057f5f3124e437b80c3bef26ed3fe Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Thu, 27 Jul 2023 20:57:50 +0300 Subject: vdpa: Add queue index attr to vdpa_nl_policy for nlattr length check The vdpa_nl_policy structure is used to validate the nlattr when parsing the incoming nlmsg. It will ensure the attribute being described produces a valid nlattr pointer in info->attrs before entering into each handler in vdpa_nl_ops. That is to say, the missing part in vdpa_nl_policy may lead to illegal nlattr after parsing, which could lead to OOB read just like CVE-2023-3773. This patch adds the missing nla_policy for vdpa queue index attr to avoid such bugs. Fixes: 13b00b135665 ("vdpa: Add support for querying vendor statistics") Signed-off-by: Lin Ma Cc: stable@vger.kernelorg Message-Id: <20230727175757.73988-5-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 3ad355a2208a..75f1df2b9d2a 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -1249,6 +1249,7 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR, /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */ [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68), + [VDPA_ATTR_DEV_QUEUE_INDEX] = { .type = NLA_U32 }, [VDPA_ATTR_DEV_FEATURES] = { .type = NLA_U64 }, }; -- cgit v1.2.3 From 5d6ba607d6cb5c58a4ddf33381e18c83dbb4098f Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Thu, 27 Jul 2023 20:57:52 +0300 Subject: vdpa: Add max vqp attr to vdpa_nl_policy for nlattr length check The vdpa_nl_policy structure is used to validate the nlattr when parsing the incoming nlmsg. It will ensure the attribute being described produces a valid nlattr pointer in info->attrs before entering into each handler in vdpa_nl_ops. That is to say, the missing part in vdpa_nl_policy may lead to illegal nlattr after parsing, which could lead to OOB read just like CVE-2023-3773. This patch adds the missing nla_policy for vdpa max vqp attr to avoid such bugs. Fixes: ad69dd0bf26b ("vdpa: Introduce query of device config layout") Signed-off-by: Lin Ma Cc: stable@vger.kernel.org Message-Id: <20230727175757.73988-7-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 75f1df2b9d2a..f2f654fd84e5 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -1247,6 +1247,7 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING }, [VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING }, [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR, + [VDPA_ATTR_DEV_NET_CFG_MAX_VQP] = { .type = NLA_U16 }, /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */ [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68), [VDPA_ATTR_DEV_QUEUE_INDEX] = { .type = NLA_U32 }, -- cgit v1.2.3 From f46c1e1620c6bbc9aad5693082efd1b80822e97c Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 27 Jul 2023 20:57:54 +0300 Subject: vdpa: Enable strict validation for netlinks ops The previous patches added the missing nla policies that were required for validation to work. Now strict validation on netlink ops can be enabled. This patch does it. Signed-off-by: Dragos Tatulea Cc: stable@vger.kernel.org Message-Id: <20230727175757.73988-9-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index f2f654fd84e5..a7612e0783b3 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -1257,37 +1257,31 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { static const struct genl_ops vdpa_nl_ops[] = { { .cmd = VDPA_CMD_MGMTDEV_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = vdpa_nl_cmd_mgmtdev_get_doit, .dumpit = vdpa_nl_cmd_mgmtdev_get_dumpit, }, { .cmd = VDPA_CMD_DEV_NEW, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = vdpa_nl_cmd_dev_add_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = VDPA_CMD_DEV_DEL, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = vdpa_nl_cmd_dev_del_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = VDPA_CMD_DEV_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = vdpa_nl_cmd_dev_get_doit, .dumpit = vdpa_nl_cmd_dev_get_dumpit, }, { .cmd = VDPA_CMD_DEV_CONFIG_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = vdpa_nl_cmd_dev_config_get_doit, .dumpit = vdpa_nl_cmd_dev_config_get_dumpit, }, { .cmd = VDPA_CMD_DEV_VSTATS_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = vdpa_nl_cmd_dev_stats_get_doit, .flags = GENL_ADMIN_PERM, }, -- cgit v1.2.3 From 7ca26efb09a1543fddb29308ea3b63b66cb5d3ee Mon Sep 17 00:00:00 2001 From: Maxime Coquelin Date: Wed, 5 Jul 2023 13:45:05 +0200 Subject: vduse: Use proper spinlock for IRQ injection The IRQ injection work used spin_lock_irq() to protect the scheduling of the softirq, but spin_lock_bh() should be used. With spin_lock_irq(), we noticed delay of more than 6 seconds between the time a NAPI polling work is scheduled and the time it is executed. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Cc: xieyongji@bytedance.com Suggested-by: Jason Wang Signed-off-by: Maxime Coquelin Message-Id: <20230705114505.63274-1-maxime.coquelin@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Xie Yongji --- drivers/vdpa/vdpa_user/vduse_dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index dc38ed21319d..df7869537ef1 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -935,10 +935,10 @@ static void vduse_dev_irq_inject(struct work_struct *work) { struct vduse_dev *dev = container_of(work, struct vduse_dev, inject); - spin_lock_irq(&dev->irq_lock); + spin_lock_bh(&dev->irq_lock); if (dev->config_cb.callback) dev->config_cb.callback(dev->config_cb.private); - spin_unlock_irq(&dev->irq_lock); + spin_unlock_bh(&dev->irq_lock); } static void vduse_vq_irq_inject(struct work_struct *work) @@ -946,10 +946,10 @@ static void vduse_vq_irq_inject(struct work_struct *work) struct vduse_virtqueue *vq = container_of(work, struct vduse_virtqueue, inject); - spin_lock_irq(&vq->irq_lock); + spin_lock_bh(&vq->irq_lock); if (vq->ready && vq->cb.callback) vq->cb.callback(vq->cb.private); - spin_unlock_irq(&vq->irq_lock); + spin_unlock_bh(&vq->irq_lock); } static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq) -- cgit v1.2.3 From df9557046440b0a62250fee3169a8f6a139f55a6 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 26 Jul 2023 22:10:07 +0300 Subject: virtio-vdpa: Fix cpumask memory leak in virtio_vdpa_find_vqs() Free the cpumask allocated by create_affinity_masks() before returning from the function. Fixes: 3dad56823b53 ("virtio-vdpa: Support interrupt affinity spreading mechanism") Signed-off-by: Gal Pressman Reviewed-by: Dragos Tatulea Message-Id: <20230726191036.14324-1-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Xie Yongji --- drivers/virtio/virtio_vdpa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 989e2d7184ce..961161da5900 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -393,11 +393,13 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, cb.callback = virtio_vdpa_config_cb; cb.private = vd_dev; ops->set_config_cb(vdpa, &cb); + kfree(masks); return 0; err_setup_vq: virtio_vdpa_del_vqs(vdev); + kfree(masks); return err; } -- cgit v1.2.3 From 3fe024193340b225d1fd410d78c495434a9d68e0 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 27 Jul 2023 20:23:46 +0300 Subject: vdpa/mlx5: Correct default number of queues when MQ is on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The standard specifies that the initial number of queues is the default, which is 1 (1 tx, 1 rx). Signed-off-by: Dragos Tatulea Reviewed-by: Eugenio Pérez Message-Id: <20230727172354.68243-2-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Tested-by: Lei Yang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 9138ef2fb2c8..6b6eb69a8a90 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2517,7 +2517,15 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) else ndev->rqt_size = 1; - ndev->cur_num_vqs = 2 * ndev->rqt_size; + /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section + * 5.1.6.5.5 "Device operation in multiqueue mode": + * + * Multiqueue is disabled by default. + * The driver enables multiqueue by sending a command using class + * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue + * operation, as follows: ... + */ + ndev->cur_num_vqs = 2; update_cvq_info(mvdev); return err; -- cgit v1.2.3 From 9ee811009ad8f87982b69e61d07447d12233ad01 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 2 Aug 2023 20:12:18 +0300 Subject: vdpa/mlx5: Fix mr->initialized semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mr->initialized flag is shared between the control vq and data vq part of the mr init/uninit. But if the control vq and data vq get placed in different ASIDs, it can happen that initializing the control vq will prevent the data vq mr from being initialized. This patch consolidates the control and data vq init parts into their own init functions. The mr->initialized will now be used for the data vq only. The control vq currently doesn't need a flag. The uninitializing part is also taken care of: mlx5_vdpa_destroy_mr got split into data and control vq functions which are now also ASID aware. Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for control and data") Signed-off-by: Dragos Tatulea Reviewed-by: Eugenio Pérez Reviewed-by: Gal Pressman Message-Id: <20230802171231.11001-3-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/core/mlx5_vdpa.h | 1 + drivers/vdpa/mlx5/core/mr.c | 97 +++++++++++++++++++++++++++----------- 2 files changed, 71 insertions(+), 27 deletions(-) diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 25fc4120b618..a0420be5059f 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -31,6 +31,7 @@ struct mlx5_vdpa_mr { struct list_head head; unsigned long num_directs; unsigned long num_klms; + /* state of dvq mr */ bool initialized; /* serialize mkey creation and destruction */ diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 03e543229791..4ae14a248a4b 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr } } -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) +{ + if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) + return; + + prune_iotlb(mvdev); +} + +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) { struct mlx5_vdpa_mr *mr = &mvdev->mr; - mutex_lock(&mr->mkey_mtx); + if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid) + return; + if (!mr->initialized) - goto out; + return; - prune_iotlb(mvdev); if (mr->user_mr) destroy_user_mr(mvdev, mr); else destroy_dma_mr(mvdev, mr); mr->initialized = false; -out: +} + +static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + + mutex_lock(&mr->mkey_mtx); + + _mlx5_vdpa_destroy_dvq_mr(mvdev, asid); + _mlx5_vdpa_destroy_cvq_mr(mvdev, asid); + mutex_unlock(&mr->mkey_mtx); } -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, - struct vhost_iotlb *iotlb, unsigned int asid) +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) +{ + mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]); + mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]); +} + +static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb, + unsigned int asid) +{ + if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) + return 0; + + return dup_iotlb(mvdev, iotlb); +} + +static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb, + unsigned int asid) { struct mlx5_vdpa_mr *mr = &mvdev->mr; int err; - if (mr->initialized) + if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid) return 0; - if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) { - if (iotlb) - err = create_user_mr(mvdev, iotlb); - else - err = create_dma_mr(mvdev, mr); + if (mr->initialized) + return 0; - if (err) - return err; - } + if (iotlb) + err = create_user_mr(mvdev, iotlb); + else + err = create_dma_mr(mvdev, mr); - if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) { - err = dup_iotlb(mvdev, iotlb); - if (err) - goto out_err; - } + if (err) + return err; mr->initialized = true; + + return 0; +} + +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb, unsigned int asid) +{ + int err; + + err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid); + if (err) + return err; + + err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid); + if (err) + goto out_err; + return 0; out_err: - if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) { - if (iotlb) - destroy_user_mr(mvdev, mr); - else - destroy_dma_mr(mvdev, mr); - } + _mlx5_vdpa_destroy_dvq_mr(mvdev, asid); return err; } -- cgit v1.2.3 From ad03a0f44cdb97b46e5c84ed353dac9b8ae2c276 Mon Sep 17 00:00:00 2001 From: Eugenio Pérez Date: Wed, 2 Aug 2023 20:12:20 +0300 Subject: vdpa/mlx5: Delete control vq iotlb in destroy_mr only when necessary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mlx5_vdpa_destroy_mr can be called from .set_map with data ASID after the control virtqueue ASID iotlb has been populated. The control vq iotlb must not be cleared, since it will not be populated again. So call the ASID aware destroy function which makes sure that the right vq resource is destroyed. Fixes: 8fcd20c30704 ("vdpa/mlx5: Support different address spaces for control and data") Signed-off-by: Eugenio Pérez Reviewed-by: Gal Pressman Message-Id: <20230802171231.11001-5-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/core/mlx5_vdpa.h | 1 + drivers/vdpa/mlx5/core/mr.c | 2 +- drivers/vdpa/mlx5/net/mlx5_vnet.c | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index a0420be5059f..b53420e874ac 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -122,6 +122,7 @@ int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *io int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, unsigned int asid); void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev); +void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid); #define mlx5_vdpa_warn(__dev, format, ...) \ dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \ diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 4ae14a248a4b..5a1971fcd87b 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -515,7 +515,7 @@ static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int mr->initialized = false; } -static void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid) +void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid) { struct mlx5_vdpa_mr *mr = &mvdev->mr; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 6b6eb69a8a90..dbbc82eec7de 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2644,7 +2644,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, goto err_mr; teardown_driver(ndev); - mlx5_vdpa_destroy_mr(mvdev); + mlx5_vdpa_destroy_mr_asid(mvdev, asid); err = mlx5_vdpa_create_mr(mvdev, iotlb, asid); if (err) goto err_mr; @@ -2660,7 +2660,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, return 0; err_setup: - mlx5_vdpa_destroy_mr(mvdev); + mlx5_vdpa_destroy_mr_asid(mvdev, asid); err_mr: return err; } -- cgit v1.2.3 From 810b0cc1c28a9b8d055dd8f7d85975e3cf9f4430 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 3 Aug 2023 18:26:33 +0300 Subject: vdpa/mlx5: Fix crash on shutdown for when no ndev exists The ndev was accessed on shutdown without a check if it actually exists. This triggered the crash pasted below. Instead of doing the ndev check, delete the shutdown handler altogether. The irqs will be released at the parent VF level (mlx5_core). BUG: kernel NULL pointer dereference, address: 0000000000000300 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP CPU: 0 PID: 1 Comm: systemd-shutdow Not tainted 6.5.0-rc2_for_upstream_min_debug_2023_07_17_15_05 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:mlx5v_shutdown+0xe/0x50 [mlx5_vdpa] RSP: 0018:ffff8881003bfdc0 EFLAGS: 00010286 RAX: ffff888103befba0 RBX: ffff888109d28008 RCX: 0000000000000017 RDX: 0000000000000001 RSI: 0000000000000212 RDI: ffff888109d28000 RBP: 0000000000000000 R08: 0000000d3a3a3882 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: ffff888109d28000 R13: ffff888109d28080 R14: 00000000fee1dead R15: 0000000000000000 FS: 00007f4969e0be40(0000) GS:ffff88852c800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000300 CR3: 00000001051cd006 CR4: 0000000000370eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? __die+0x20/0x60 ? page_fault_oops+0x14c/0x3c0 ? exc_page_fault+0x75/0x140 ? asm_exc_page_fault+0x22/0x30 ? mlx5v_shutdown+0xe/0x50 [mlx5_vdpa] device_shutdown+0x13e/0x1e0 kernel_restart+0x36/0x90 __do_sys_reboot+0x141/0x210 ? vfs_writev+0xcd/0x140 ? handle_mm_fault+0x161/0x260 ? do_writev+0x6b/0x110 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f496990fb56 RSP: 002b:00007fffc7bdde88 EFLAGS: 00000206 ORIG_RAX: 00000000000000a9 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f496990fb56 RDX: 0000000001234567 RSI: 0000000028121969 RDI: fffffffffee1dead RBP: 00007fffc7bde1d0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000 R13: 00007fffc7bddf10 R14: 0000000000000000 R15: 00007fffc7bde2b8 CR2: 0000000000000300 ---[ end trace 0000000000000000 ]--- Fixes: bc9a2b3e686e ("vdpa/mlx5: Support interrupt bypassing") Signed-off-by: Dragos Tatulea Message-Id: <20230803152648.199297-1-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index dbbc82eec7de..37be945a0230 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -3556,17 +3556,6 @@ static void mlx5v_remove(struct auxiliary_device *adev) kfree(mgtdev); } -static void mlx5v_shutdown(struct auxiliary_device *auxdev) -{ - struct mlx5_vdpa_mgmtdev *mgtdev; - struct mlx5_vdpa_net *ndev; - - mgtdev = auxiliary_get_drvdata(auxdev); - ndev = mgtdev->ndev; - - free_irqs(ndev); -} - static const struct auxiliary_device_id mlx5v_id_table[] = { { .name = MLX5_ADEV_NAME ".vnet", }, {}, @@ -3578,7 +3567,6 @@ static struct auxiliary_driver mlx5v_driver = { .name = "vnet", .probe = mlx5v_probe, .remove = mlx5v_remove, - .shutdown = mlx5v_shutdown, .id_table = mlx5v_id_table, }; -- cgit v1.2.3 From 2c507ce90e02cd78d00fd4b0fe26c8641873c13f Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Thu, 10 Aug 2023 19:04:05 +0800 Subject: virtio-net: Zero max_tx_vq field for VIRTIO_NET_CTRL_MQ_HASH_CONFIG case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kernel uses `struct virtio_net_ctrl_rss` to save command-specific-data for both the VIRTIO_NET_CTRL_MQ_HASH_CONFIG and VIRTIO_NET_CTRL_MQ_RSS_CONFIG commands. According to the VirtIO standard, "Field reserved MUST contain zeroes. It is defined to make the structure to match the layout of virtio_net_rss_config structure, defined in 5.1.6.5.7.". Yet for the VIRTIO_NET_CTRL_MQ_HASH_CONFIG command case, the `max_tx_vq` field in struct virtio_net_ctrl_rss, which corresponds to the `reserved` field in struct virtio_net_hash_config, is not zeroed, thereby violating the VirtIO standard. This patch solves this problem by zeroing this field in virtnet_init_default_rss(). Cc: Andrew Melnychenko Cc: stable@vger.kernel.org Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Signed-off-by: Hawkins Jiawei Acked-by: Jason Wang Acked-by: Eugenio Pérez Acked-by: Michael S. Tsirkin Message-Id: <20230810110405.25558-1-yin31149@gmail.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo Acked-by: Jason Wang --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1270c8d23463..8db38634ae82 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2761,7 +2761,7 @@ static void virtnet_init_default_rss(struct virtnet_info *vi) vi->ctrl->rss.indirection_table[i] = indir_val; } - vi->ctrl->rss.max_tx_vq = vi->curr_queue_pairs; + vi->ctrl->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0; vi->ctrl->rss.hash_key_length = vi->rss_key_size; netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size); -- cgit v1.2.3 From 0cd2c13b1c15dbbdf1e2ae5b7160537f97df06b5 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 10 Jul 2023 21:24:33 -0700 Subject: pds_vdpa: reset to vdpa specified mac When the vdpa device is reset, also reinitialize it with the mac address that was assigned when the device was added. Fixes: 151cc834f3dd ("pds_vdpa: add support for vdpa and vdpamgmt interfaces") Signed-off-by: Allen Hubbe Signed-off-by: Shannon Nelson Message-Id: <20230711042437.69381-2-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/pds/vdpa_dev.c | 16 ++++++++-------- drivers/vdpa/pds/vdpa_dev.h | 1 + 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c index 5071a4d58f8d..e2e99bb0be2b 100644 --- a/drivers/vdpa/pds/vdpa_dev.c +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -409,6 +409,8 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) pdsv->vqs[i].avail_idx = 0; pdsv->vqs[i].used_idx = 0; } + + pds_vdpa_cmd_set_mac(pdsv, pdsv->mac); } if (status & ~old_status & VIRTIO_CONFIG_S_FEATURES_OK) { @@ -532,7 +534,6 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, struct device *dma_dev; struct pci_dev *pdev; struct device *dev; - u8 mac[ETH_ALEN]; int err; int i; @@ -617,19 +618,18 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, * or set a random mac if default is 00:..:00 */ if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR)) { - ether_addr_copy(mac, add_config->net.mac); - pds_vdpa_cmd_set_mac(pdsv, mac); + ether_addr_copy(pdsv->mac, add_config->net.mac); } else { struct virtio_net_config __iomem *vc; vc = pdsv->vdpa_aux->vd_mdev.device; - memcpy_fromio(mac, vc->mac, sizeof(mac)); - if (is_zero_ether_addr(mac)) { - eth_random_addr(mac); - dev_info(dev, "setting random mac %pM\n", mac); - pds_vdpa_cmd_set_mac(pdsv, mac); + memcpy_fromio(pdsv->mac, vc->mac, sizeof(pdsv->mac)); + if (is_zero_ether_addr(pdsv->mac)) { + eth_random_addr(pdsv->mac); + dev_info(dev, "setting random mac %pM\n", pdsv->mac); } } + pds_vdpa_cmd_set_mac(pdsv, pdsv->mac); for (i = 0; i < pdsv->num_vqs; i++) { pdsv->vqs[i].qid = i; diff --git a/drivers/vdpa/pds/vdpa_dev.h b/drivers/vdpa/pds/vdpa_dev.h index a1bc37de9537..cf02df287fc4 100644 --- a/drivers/vdpa/pds/vdpa_dev.h +++ b/drivers/vdpa/pds/vdpa_dev.h @@ -39,6 +39,7 @@ struct pds_vdpa_device { u64 req_features; /* features requested by vdpa */ u8 vdpa_index; /* rsvd for future subdevice use */ u8 num_vqs; /* num vqs in use */ + u8 mac[ETH_ALEN]; /* mac selected when the device was added */ struct vdpa_callback config_cb; struct notifier_block nb; }; -- cgit v1.2.3 From abdf31bd91120035172dc58e2e87064a72e9e087 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 10 Jul 2023 21:24:34 -0700 Subject: pds_vdpa: always allow offering VIRTIO_NET_F_MAC Our driver sets a mac if the HW is 00:..:00 so we need to be sure to advertise VIRTIO_NET_F_MAC even if the HW doesn't. We also need to be sure that virtio_net sees the VIRTIO_NET_F_MAC and doesn't rewrite the mac address that a user may have set with the vdpa utility. After reading the hw_feature bits, add the VIRTIO_NET_F_MAC to the driver's supported_features and use that for reporting what is available. If the HW is not advertising it, be sure to strip the VIRTIO_NET_F_MAC before finishing the feature negotiation. If the user specifies a device_features bitpattern in the vdpa utility without the VIRTIO_NET_F_MAC set, then don't set the mac. Fixes: 151cc834f3dd ("pds_vdpa: add support for vdpa and vdpamgmt interfaces") Signed-off-by: Shannon Nelson Message-Id: <20230711042437.69381-3-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/pds/debugfs.c | 2 -- drivers/vdpa/pds/vdpa_dev.c | 30 +++++++++++++++++++++--------- drivers/vdpa/pds/vdpa_dev.h | 4 ++-- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/vdpa/pds/debugfs.c b/drivers/vdpa/pds/debugfs.c index 21a0dc0cb607..754ccb7a6666 100644 --- a/drivers/vdpa/pds/debugfs.c +++ b/drivers/vdpa/pds/debugfs.c @@ -224,8 +224,6 @@ static int config_show(struct seq_file *seq, void *v) seq_printf(seq, "dev_status: %#x\n", status); print_status_bits(seq, status); - seq_printf(seq, "req_features: %#llx\n", pdsv->req_features); - print_feature_bits_all(seq, pdsv->req_features); driver_features = vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev); seq_printf(seq, "driver_features: %#llx\n", driver_features); print_feature_bits_all(seq, driver_features); diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c index e2e99bb0be2b..5b566e0eef0a 100644 --- a/drivers/vdpa/pds/vdpa_dev.c +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -318,6 +318,7 @@ static int pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 featur struct device *dev = &pdsv->vdpa_dev.dev; u64 driver_features; u64 nego_features; + u64 hw_features; u64 missing; if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) { @@ -325,21 +326,26 @@ static int pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 featur return -EOPNOTSUPP; } - pdsv->req_features = features; - /* Check for valid feature bits */ - nego_features = features & le64_to_cpu(pdsv->vdpa_aux->ident.hw_features); - missing = pdsv->req_features & ~nego_features; + nego_features = features & pdsv->supported_features; + missing = features & ~nego_features; if (missing) { dev_err(dev, "Can't support all requested features in %#llx, missing %#llx features\n", - pdsv->req_features, missing); + features, missing); return -EOPNOTSUPP; } + pdsv->negotiated_features = nego_features; + driver_features = pds_vdpa_get_driver_features(vdpa_dev); dev_dbg(dev, "%s: %#llx => %#llx\n", __func__, driver_features, nego_features); + /* if we're faking the F_MAC, strip it before writing to device */ + hw_features = le64_to_cpu(pdsv->vdpa_aux->ident.hw_features); + if (!(hw_features & BIT_ULL(VIRTIO_NET_F_MAC))) + nego_features &= ~BIT_ULL(VIRTIO_NET_F_MAC); + if (driver_features == nego_features) return 0; @@ -352,7 +358,7 @@ static u64 pds_vdpa_get_driver_features(struct vdpa_device *vdpa_dev) { struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); - return vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev); + return pdsv->negotiated_features; } static void pds_vdpa_set_config_cb(struct vdpa_device *vdpa_dev, @@ -564,7 +570,7 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { u64 unsupp_features = - add_config->device_features & ~mgmt->supported_features; + add_config->device_features & ~pdsv->supported_features; if (unsupp_features) { dev_err(dev, "Unsupported features: %#llx\n", unsupp_features); @@ -615,7 +621,8 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, } /* Set a mac, either from the user config if provided - * or set a random mac if default is 00:..:00 + * or use the device's mac if not 00:..:00 + * or set a random mac */ if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR)) { ether_addr_copy(pdsv->mac, add_config->net.mac); @@ -624,7 +631,8 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, vc = pdsv->vdpa_aux->vd_mdev.device; memcpy_fromio(pdsv->mac, vc->mac, sizeof(pdsv->mac)); - if (is_zero_ether_addr(pdsv->mac)) { + if (is_zero_ether_addr(pdsv->mac) && + (pdsv->supported_features & BIT_ULL(VIRTIO_NET_F_MAC))) { eth_random_addr(pdsv->mac); dev_info(dev, "setting random mac %pM\n", pdsv->mac); } @@ -752,6 +760,10 @@ int pds_vdpa_get_mgmt_info(struct pds_vdpa_aux *vdpa_aux) mgmt->id_table = pds_vdpa_id_table; mgmt->device = dev; mgmt->supported_features = le64_to_cpu(vdpa_aux->ident.hw_features); + + /* advertise F_MAC even if the device doesn't */ + mgmt->supported_features |= BIT_ULL(VIRTIO_NET_F_MAC); + mgmt->config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR); mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES); diff --git a/drivers/vdpa/pds/vdpa_dev.h b/drivers/vdpa/pds/vdpa_dev.h index cf02df287fc4..d984ba24a7da 100644 --- a/drivers/vdpa/pds/vdpa_dev.h +++ b/drivers/vdpa/pds/vdpa_dev.h @@ -35,8 +35,8 @@ struct pds_vdpa_device { struct pds_vdpa_aux *vdpa_aux; struct pds_vdpa_vq_info vqs[PDS_VDPA_MAX_QUEUES]; - u64 supported_features; /* specified device features */ - u64 req_features; /* features requested by vdpa */ + u64 supported_features; /* supported device features */ + u64 negotiated_features; /* negotiated features */ u8 vdpa_index; /* rsvd for future subdevice use */ u8 num_vqs; /* num vqs in use */ u8 mac[ETH_ALEN]; /* mac selected when the device was added */ -- cgit v1.2.3 From ed88863040daad18d3f9b12f7c9c1c3da3731e1f Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 10 Jul 2023 21:24:35 -0700 Subject: pds_vdpa: clean and reset vqs entries Make sure that we initialize the vqs[] entries the same way both for initial setup and after a vq reset. Fixes: 151cc834f3dd ("pds_vdpa: add support for vdpa and vdpamgmt interfaces") Signed-off-by: Shannon Nelson Message-Id: <20230711042437.69381-4-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/pds/vdpa_dev.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c index 5b566e0eef0a..04a362648b02 100644 --- a/drivers/vdpa/pds/vdpa_dev.c +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -428,6 +428,17 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) } } +static void pds_vdpa_init_vqs_entry(struct pds_vdpa_device *pdsv, int qid, + void __iomem *notify) +{ + memset(&pdsv->vqs[qid], 0, sizeof(pdsv->vqs[0])); + pdsv->vqs[qid].qid = qid; + pdsv->vqs[qid].pdsv = pdsv; + pdsv->vqs[qid].ready = false; + pdsv->vqs[qid].irq = VIRTIO_MSI_NO_VECTOR; + pdsv->vqs[qid].notify = notify; +} + static int pds_vdpa_reset(struct vdpa_device *vdpa_dev) { struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); @@ -450,8 +461,7 @@ static int pds_vdpa_reset(struct vdpa_device *vdpa_dev) dev_err(dev, "%s: reset_vq failed qid %d: %pe\n", __func__, i, ERR_PTR(err)); pds_vdpa_release_irq(pdsv, i); - memset(&pdsv->vqs[i], 0, sizeof(pdsv->vqs[0])); - pdsv->vqs[i].ready = false; + pds_vdpa_init_vqs_entry(pdsv, i, pdsv->vqs[i].notify); } } @@ -640,11 +650,11 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, pds_vdpa_cmd_set_mac(pdsv, pdsv->mac); for (i = 0; i < pdsv->num_vqs; i++) { - pdsv->vqs[i].qid = i; - pdsv->vqs[i].pdsv = pdsv; - pdsv->vqs[i].irq = VIRTIO_MSI_NO_VECTOR; - pdsv->vqs[i].notify = vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev, - i, &pdsv->vqs[i].notify_pa); + void __iomem *notify; + + notify = vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev, + i, &pdsv->vqs[i].notify_pa); + pds_vdpa_init_vqs_entry(pdsv, i, notify); } pdsv->vdpa_dev.mdev = &vdpa_aux->vdpa_mdev; -- cgit v1.2.3 From c0a6c5cbf1a9e49357e942ed393da08a55808a49 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 10 Jul 2023 21:24:36 -0700 Subject: pds_vdpa: alloc irq vectors on DRIVER_OK We were allocating irq vectors at the time the aux dev was probed, but that is before the PCI VF is assigned to a separate iommu domain by vhost_vdpa. Because vhost_vdpa later changes the iommu domain the interrupts do not work. Instead, we can allocate the irq vectors later when we see DRIVER_OK and know that the reassignment of the PCI VF to an iommu domain has already happened. Fixes: 151cc834f3dd ("pds_vdpa: add support for vdpa and vdpamgmt interfaces") Signed-off-by: Allen Hubbe Signed-off-by: Shannon Nelson Acked-by: Jason Wang Message-Id: <20230711042437.69381-5-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/pds/vdpa_dev.c | 110 ++++++++++++++++++++++++++++++++------------ 1 file changed, 81 insertions(+), 29 deletions(-) diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c index 04a362648b02..52b2449182ad 100644 --- a/drivers/vdpa/pds/vdpa_dev.c +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -126,11 +126,9 @@ static void pds_vdpa_release_irq(struct pds_vdpa_device *pdsv, int qid) static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool ready) { struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); - struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev; struct device *dev = &pdsv->vdpa_dev.dev; u64 driver_features; u16 invert_idx = 0; - int irq; int err; dev_dbg(dev, "%s: qid %d ready %d => %d\n", @@ -143,19 +141,6 @@ static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool re invert_idx = PDS_VDPA_PACKED_INVERT_IDX; if (ready) { - irq = pci_irq_vector(pdev, qid); - snprintf(pdsv->vqs[qid].irq_name, sizeof(pdsv->vqs[qid].irq_name), - "vdpa-%s-%d", dev_name(dev), qid); - - err = request_irq(irq, pds_vdpa_isr, 0, - pdsv->vqs[qid].irq_name, &pdsv->vqs[qid]); - if (err) { - dev_err(dev, "%s: no irq for qid %d: %pe\n", - __func__, qid, ERR_PTR(err)); - return; - } - pdsv->vqs[qid].irq = irq; - /* Pass vq setup info to DSC using adminq to gather up and * send all info at once so FW can do its full set up in * one easy operation @@ -164,7 +149,6 @@ static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool re if (err) { dev_err(dev, "Failed to init vq %d: %pe\n", qid, ERR_PTR(err)); - pds_vdpa_release_irq(pdsv, qid); ready = false; } } else { @@ -172,7 +156,6 @@ static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool re if (err) dev_err(dev, "%s: reset_vq failed qid %d: %pe\n", __func__, qid, ERR_PTR(err)); - pds_vdpa_release_irq(pdsv, qid); } pdsv->vqs[qid].ready = ready; @@ -395,6 +378,72 @@ static u8 pds_vdpa_get_status(struct vdpa_device *vdpa_dev) return vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev); } +static int pds_vdpa_request_irqs(struct pds_vdpa_device *pdsv) +{ + struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev; + struct pds_vdpa_aux *vdpa_aux = pdsv->vdpa_aux; + struct device *dev = &pdsv->vdpa_dev.dev; + int max_vq, nintrs, qid, err; + + max_vq = vdpa_aux->vdpa_mdev.max_supported_vqs; + + nintrs = pci_alloc_irq_vectors(pdev, max_vq, max_vq, PCI_IRQ_MSIX); + if (nintrs < 0) { + dev_err(dev, "Couldn't get %d msix vectors: %pe\n", + max_vq, ERR_PTR(nintrs)); + return nintrs; + } + + for (qid = 0; qid < pdsv->num_vqs; ++qid) { + int irq = pci_irq_vector(pdev, qid); + + snprintf(pdsv->vqs[qid].irq_name, sizeof(pdsv->vqs[qid].irq_name), + "vdpa-%s-%d", dev_name(dev), qid); + + err = request_irq(irq, pds_vdpa_isr, 0, + pdsv->vqs[qid].irq_name, + &pdsv->vqs[qid]); + if (err) { + dev_err(dev, "%s: no irq for qid %d: %pe\n", + __func__, qid, ERR_PTR(err)); + goto err_release; + } + + pdsv->vqs[qid].irq = irq; + } + + vdpa_aux->nintrs = nintrs; + + return 0; + +err_release: + while (qid--) + pds_vdpa_release_irq(pdsv, qid); + + pci_free_irq_vectors(pdev); + + vdpa_aux->nintrs = 0; + + return err; +} + +static void pds_vdpa_release_irqs(struct pds_vdpa_device *pdsv) +{ + struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev; + struct pds_vdpa_aux *vdpa_aux = pdsv->vdpa_aux; + int qid; + + if (!vdpa_aux->nintrs) + return; + + for (qid = 0; qid < pdsv->num_vqs; qid++) + pds_vdpa_release_irq(pdsv, qid); + + pci_free_irq_vectors(pdev); + + vdpa_aux->nintrs = 0; +} + static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) { struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev); @@ -405,6 +454,11 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) old_status = pds_vdpa_get_status(vdpa_dev); dev_dbg(dev, "%s: old %#x new %#x\n", __func__, old_status, status); + if (status & ~old_status & VIRTIO_CONFIG_S_DRIVER_OK) { + if (pds_vdpa_request_irqs(pdsv)) + status = old_status | VIRTIO_CONFIG_S_FAILED; + } + pds_vdpa_cmd_set_status(pdsv, status); /* Note: still working with FW on the need for this reset cmd */ @@ -426,6 +480,9 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) i, &pdsv->vqs[i].notify_pa); } } + + if (old_status & ~status & VIRTIO_CONFIG_S_DRIVER_OK) + pds_vdpa_release_irqs(pdsv); } static void pds_vdpa_init_vqs_entry(struct pds_vdpa_device *pdsv, int qid, @@ -460,13 +517,17 @@ static int pds_vdpa_reset(struct vdpa_device *vdpa_dev) if (err) dev_err(dev, "%s: reset_vq failed qid %d: %pe\n", __func__, i, ERR_PTR(err)); - pds_vdpa_release_irq(pdsv, i); - pds_vdpa_init_vqs_entry(pdsv, i, pdsv->vqs[i].notify); } } pds_vdpa_set_status(vdpa_dev, 0); + if (status & VIRTIO_CONFIG_S_DRIVER_OK) { + /* Reset the vq info */ + for (i = 0; i < pdsv->num_vqs && !err; i++) + pds_vdpa_init_vqs_entry(pdsv, i, pdsv->vqs[i].notify); + } + return 0; } @@ -764,7 +825,7 @@ int pds_vdpa_get_mgmt_info(struct pds_vdpa_aux *vdpa_aux) max_vqs = min_t(u16, dev_intrs, max_vqs); mgmt->max_supported_vqs = min_t(u16, PDS_VDPA_MAX_QUEUES, max_vqs); - vdpa_aux->nintrs = mgmt->max_supported_vqs; + vdpa_aux->nintrs = 0; mgmt->ops = &pds_vdpa_mgmt_dev_ops; mgmt->id_table = pds_vdpa_id_table; @@ -778,14 +839,5 @@ int pds_vdpa_get_mgmt_info(struct pds_vdpa_aux *vdpa_aux) mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES); - err = pci_alloc_irq_vectors(pdev, vdpa_aux->nintrs, vdpa_aux->nintrs, - PCI_IRQ_MSIX); - if (err < 0) { - dev_err(dev, "Couldn't get %d msix vectors: %pe\n", - vdpa_aux->nintrs, ERR_PTR(err)); - return err; - } - vdpa_aux->nintrs = err; - return 0; } -- cgit v1.2.3 From 8efc365b20dc9a5b0c8fd0e8a195690bf21cd8be Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 10 Jul 2023 21:24:37 -0700 Subject: pds_vdpa: fix up debugfs feature bit printing Make clearer in debugfs output the difference between the hw feature bits, the features supported through the driver, and the features that have been negotiated. Signed-off-by: Shannon Nelson Message-Id: <20230711042437.69381-6-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/pds/debugfs.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/vdpa/pds/debugfs.c b/drivers/vdpa/pds/debugfs.c index 754ccb7a6666..9b04aad6ec35 100644 --- a/drivers/vdpa/pds/debugfs.c +++ b/drivers/vdpa/pds/debugfs.c @@ -176,6 +176,7 @@ static int identity_show(struct seq_file *seq, void *v) { struct pds_vdpa_aux *vdpa_aux = seq->private; struct vdpa_mgmt_dev *mgmt; + u64 hw_features; seq_printf(seq, "aux_dev: %s\n", dev_name(&vdpa_aux->padev->aux_dev.dev)); @@ -183,8 +184,9 @@ static int identity_show(struct seq_file *seq, void *v) mgmt = &vdpa_aux->vdpa_mdev; seq_printf(seq, "max_vqs: %d\n", mgmt->max_supported_vqs); seq_printf(seq, "config_attr_mask: %#llx\n", mgmt->config_attr_mask); - seq_printf(seq, "supported_features: %#llx\n", mgmt->supported_features); - print_feature_bits_all(seq, mgmt->supported_features); + hw_features = le64_to_cpu(vdpa_aux->ident.hw_features); + seq_printf(seq, "hw_features: %#llx\n", hw_features); + print_feature_bits_all(seq, hw_features); return 0; } @@ -200,7 +202,6 @@ static int config_show(struct seq_file *seq, void *v) { struct pds_vdpa_device *pdsv = seq->private; struct virtio_net_config vc; - u64 driver_features; u8 status; memcpy_fromio(&vc, pdsv->vdpa_aux->vd_mdev.device, @@ -223,10 +224,8 @@ static int config_show(struct seq_file *seq, void *v) status = vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev); seq_printf(seq, "dev_status: %#x\n", status); print_status_bits(seq, status); - - driver_features = vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev); - seq_printf(seq, "driver_features: %#llx\n", driver_features); - print_feature_bits_all(seq, driver_features); + seq_printf(seq, "negotiated_features: %#llx\n", pdsv->negotiated_features); + print_feature_bits_all(seq, pdsv->negotiated_features); seq_printf(seq, "vdpa_index: %d\n", pdsv->vdpa_index); seq_printf(seq, "num_vqs: %d\n", pdsv->num_vqs); -- cgit v1.2.3 From f504e15b94eb4e5b47f8715da59c0207f68dffe1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 13 Jul 2023 16:55:48 +0200 Subject: virtio-mem: remove unsafe unplug in Big Block Mode (BBM) When "unsafe unplug" is enabled, we don't fake-offline all memory ahead of actual memory offlining using alloc_contig_range(). Instead, we rely on offline_pages() to also perform actual page migration, which might fail or take a very long time. In that case, it's possible to easily run into endless loops that cannot be aborted anymore (as offlining is triggered by a workqueue then): For example, a single (accidentally) permanently unmovable page in ZONE_MOVABLE results in an endless loop. For ZONE_NORMAL, races between isolating the pageblock (and checking for unmovable pages) and concurrent page allocation are possible and similarly result in endless loops. The idea of the unsafe unplug mode was to make it possible to more reliably unplug large memory blocks. However, (a) we really should be tackling that differently, by extending the alloc_contig_range()-based mechanism; and (b) this mode is not the default and as far as I know, it's unused either way. So let's simply get rid of it. Signed-off-by: David Hildenbrand Message-Id: <20230713145551.2824980-2-david@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 51 ++++++++++++++++++--------------------------- 1 file changed, 20 insertions(+), 31 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 835f6cc2fb66..ed15d2a4bd96 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -38,11 +38,6 @@ module_param(bbm_block_size, ulong, 0444); MODULE_PARM_DESC(bbm_block_size, "Big Block size in bytes. Default is 0 (auto-detection)."); -static bool bbm_safe_unplug = true; -module_param(bbm_safe_unplug, bool, 0444); -MODULE_PARM_DESC(bbm_safe_unplug, - "Use a safe unplug mechanism in BBM, avoiding long/endless loops"); - /* * virtio-mem currently supports the following modes of operation: * @@ -2111,38 +2106,32 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, VIRTIO_MEM_BBM_BB_ADDED)) return -EINVAL; - if (bbm_safe_unplug) { - /* - * Start by fake-offlining all memory. Once we marked the device - * block as fake-offline, all newly onlined memory will - * automatically be kept fake-offline. Protect from concurrent - * onlining/offlining until we have a consistent state. - */ - mutex_lock(&vm->hotplug_mutex); - virtio_mem_bbm_set_bb_state(vm, bb_id, - VIRTIO_MEM_BBM_BB_FAKE_OFFLINE); + /* + * Start by fake-offlining all memory. Once we marked the device + * block as fake-offline, all newly onlined memory will + * automatically be kept fake-offline. Protect from concurrent + * onlining/offlining until we have a consistent state. + */ + mutex_lock(&vm->hotplug_mutex); + virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_FAKE_OFFLINE); - for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { - page = pfn_to_online_page(pfn); - if (!page) - continue; + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + page = pfn_to_online_page(pfn); + if (!page) + continue; - rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION); - if (rc) { - end_pfn = pfn; - goto rollback_safe_unplug; - } + rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION); + if (rc) { + end_pfn = pfn; + goto rollback; } - mutex_unlock(&vm->hotplug_mutex); } + mutex_unlock(&vm->hotplug_mutex); rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id); if (rc) { - if (bbm_safe_unplug) { - mutex_lock(&vm->hotplug_mutex); - goto rollback_safe_unplug; - } - return rc; + mutex_lock(&vm->hotplug_mutex); + goto rollback; } rc = virtio_mem_bbm_unplug_bb(vm, bb_id); @@ -2154,7 +2143,7 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, VIRTIO_MEM_BBM_BB_UNUSED); return rc; -rollback_safe_unplug: +rollback: for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { page = pfn_to_online_page(pfn); if (!page) -- cgit v1.2.3 From ddf409851461f515cc32974714b73efe2e012bde Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 13 Jul 2023 16:55:49 +0200 Subject: virtio-mem: convert most offline_and_remove_memory() errors to -EBUSY Just like we do with alloc_contig_range(), let's convert all unknown errors to -EBUSY, but WARN so we can look into the issue. For example, offline_pages() could fail with -EINTR, which would be unexpected in our case. Signed-off-by: David Hildenbrand Message-Id: <20230713145551.2824980-3-david@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index ed15d2a4bd96..1a76ba2bc118 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -741,11 +741,15 @@ static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm, * immediately instead of waiting. */ virtio_mem_retry(vm); - } else { - dev_dbg(&vm->vdev->dev, - "offlining and removing memory failed: %d\n", rc); + return 0; } - return rc; + dev_dbg(&vm->vdev->dev, "offlining and removing memory failed: %d\n", rc); + /* + * We don't really expect this to fail, because we fake-offlined all + * memory already. But it could fail in corner cases. + */ + WARN_ON_ONCE(rc != -ENOMEM && rc != -EBUSY); + return rc == -ENOMEM ? -ENOMEM : -EBUSY; } /* -- cgit v1.2.3 From a31648fd4f96fbe0a4d0aeb16b57a2405c6943c0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 13 Jul 2023 16:55:50 +0200 Subject: virtio-mem: keep retrying on offline_and_remove_memory() errors in Sub Block Mode (SBM) In case offline_and_remove_memory() fails in SBM, we leave a completely unplugged Linux memory block stick around until we try plugging memory again. We won't try removing that memory block again. offline_and_remove_memory() may, for example, fail if we're racing with another alloc_contig_range() user, if allocating temporary memory fails, or if some memory notifier rejected the offlining request. Let's handle that case better, by simple retrying to offline and remove such memory. Tested using CONFIG_MEMORY_NOTIFIER_ERROR_INJECT. Signed-off-by: David Hildenbrand Message-Id: <20230713145551.2824980-4-david@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 92 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 19 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 1a76ba2bc118..a5cf92e3e5af 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -168,6 +168,13 @@ struct virtio_mem { /* The number of subblocks per Linux memory block. */ uint32_t sbs_per_mb; + /* + * Some of the Linux memory blocks tracked as "partially + * plugged" are completely unplugged and can be offlined + * and removed -- which previously failed. + */ + bool have_unplugged_mb; + /* Summary of all memory block states. */ unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT]; @@ -765,6 +772,34 @@ static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm, return virtio_mem_offline_and_remove_memory(vm, addr, size); } +/* + * Try (offlining and) removing memory from Linux in case all subblocks are + * unplugged. Can be called on online and offline memory blocks. + * + * May modify the state of memory blocks in virtio-mem. + */ +static int virtio_mem_sbm_try_remove_unplugged_mb(struct virtio_mem *vm, + unsigned long mb_id) +{ + int rc; + + /* + * Once all subblocks of a memory block were unplugged, offline and + * remove it. + */ + if (!virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) + return 0; + + /* offline_and_remove_memory() works for online and offline memory. */ + mutex_unlock(&vm->hotplug_mutex); + rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id); + mutex_lock(&vm->hotplug_mutex); + if (!rc) + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_UNUSED); + return rc; +} + /* * See virtio_mem_offline_and_remove_memory(): Try to offline and remove a * all Linux memory blocks covered by the big block. @@ -1988,20 +2023,10 @@ static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm, } unplugged: - /* - * Once all subblocks of a memory block were unplugged, offline and - * remove it. This will usually not fail, as no memory is in use - * anymore - however some other notifiers might NACK the request. - */ - if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { - mutex_unlock(&vm->hotplug_mutex); - rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id); - mutex_lock(&vm->hotplug_mutex); - if (!rc) - virtio_mem_sbm_set_mb_state(vm, mb_id, - VIRTIO_MEM_SBM_MB_UNUSED); - } - + rc = virtio_mem_sbm_try_remove_unplugged_mb(vm, mb_id); + if (rc) + vm->sbm.have_unplugged_mb = 1; + /* Ignore errors, this is not critical. We'll retry later. */ return 0; } @@ -2253,12 +2278,13 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) /* * Try to unplug all blocks that couldn't be unplugged before, for example, - * because the hypervisor was busy. + * because the hypervisor was busy. Further, offline and remove any memory + * blocks where we previously failed. */ -static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm) +static int virtio_mem_cleanup_pending_mb(struct virtio_mem *vm) { unsigned long id; - int rc; + int rc = 0; if (!vm->in_sbm) { virtio_mem_bbm_for_each_bb(vm, id, @@ -2280,6 +2306,27 @@ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm) VIRTIO_MEM_SBM_MB_UNUSED); } + if (!vm->sbm.have_unplugged_mb) + return 0; + + /* + * Let's retry (offlining and) removing completely unplugged Linux + * memory blocks. + */ + vm->sbm.have_unplugged_mb = false; + + mutex_lock(&vm->hotplug_mutex); + virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL) + rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id); + virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL) + rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id); + virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) + rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id); + mutex_unlock(&vm->hotplug_mutex); + + if (rc) + vm->sbm.have_unplugged_mb = true; + /* Ignore errors, this is not critical. We'll retry later. */ return 0; } @@ -2361,9 +2408,9 @@ retry: virtio_mem_refresh_config(vm); } - /* Unplug any leftovers from previous runs */ + /* Cleanup any leftovers from previous runs */ if (!rc) - rc = virtio_mem_unplug_pending_mb(vm); + rc = virtio_mem_cleanup_pending_mb(vm); if (!rc && vm->requested_size != vm->plugged_size) { if (vm->requested_size > vm->plugged_size) { @@ -2375,6 +2422,13 @@ retry: } } + /* + * Keep retrying to offline and remove completely unplugged Linux + * memory blocks. + */ + if (!rc && vm->in_sbm && vm->sbm.have_unplugged_mb) + rc = -EBUSY; + switch (rc) { case 0: vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS; -- cgit v1.2.3 From f55484fd7be923b740e8e1fc304070ba53675cb4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 13 Jul 2023 16:55:51 +0200 Subject: virtio-mem: check if the config changed before fake offlining memory If we repeatedly fail to fake offline memory to unplug it, we won't be sending any unplug requests to the device. However, we only check if the config changed when sending such (un)plug requests. We could end up trying for a long time to unplug memory, even though the config changed already and we're not supposed to unplug memory anymore. For example, the hypervisor might detect a low-memory situation while unplugging memory and decide to replug some memory. Continuing trying to unplug memory in that case can be problematic. So let's check on a more regular basis. Signed-off-by: David Hildenbrand Message-Id: <20230713145551.2824980-5-david@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index a5cf92e3e5af..fa5226c198cc 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1189,7 +1189,8 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) * Try to allocate a range, marking pages fake-offline, effectively * fake-offlining them. */ -static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages) +static int virtio_mem_fake_offline(struct virtio_mem *vm, unsigned long pfn, + unsigned long nr_pages) { const bool is_movable = is_zone_movable_page(pfn_to_page(pfn)); int rc, retry_count; @@ -1202,6 +1203,14 @@ static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages) * some guarantees. */ for (retry_count = 0; retry_count < 5; retry_count++) { + /* + * If the config changed, stop immediately and go back to the + * main loop: avoid trying to keep unplugging if the device + * might have decided to not remove any more memory. + */ + if (atomic_read(&vm->config_changed)) + return -EAGAIN; + rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, GFP_KERNEL); if (rc == -ENOMEM) @@ -1951,7 +1960,7 @@ static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm, start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->sbm.sb_size); - rc = virtio_mem_fake_offline(start_pfn, nr_pages); + rc = virtio_mem_fake_offline(vm, start_pfn, nr_pages); if (rc) return rc; @@ -2149,7 +2158,7 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, if (!page) continue; - rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION); + rc = virtio_mem_fake_offline(vm, pfn, PAGES_PER_SECTION); if (rc) { end_pfn = pfn; goto rollback; -- cgit v1.2.3