summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2025-11-14 05:10:09 -0700
committerJens Axboe <axboe@kernel.dk>2025-11-14 05:10:09 -0700
commitfa0d2dc69e945ddbb1d023dfb4f7ba4103523213 (patch)
tree4512b28a5e01e2d581381a7f75fce083be3e906b /drivers
parent8e1bf774ab18157cb8041628f2661aa12e425914 (diff)
parent37f0c7a8df7ad719a68fa1c2dbf066cfebc391a7 (diff)
Merge branch 'p2pdma-mmio-6.19.v5' into for-6.19/block
Merge MMIO P2P DMA series from Leon: "This patch series improves block layer and NVMe driver support for MMIO memory regions, particularly for peer-to-peer (P2P) DMA transfers that go through the host bridge. The series addresses a critical gap where P2P transfers through the host bridge (PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) were not properly marked as MMIO memory, leading to potential issues with: - Inappropriate CPU cache synchronization operations on MMIO regions - Incorrect DMA mapping/unmapping that doesn't respect MMIO semantics - Missing IOMMU configuration for MMIO memory handling This work is extracted from the larger DMA physical API improvement series [1] and focuses specifically on block layer and NVMe requirements for MMIO memory support. [1] https://lore.kernel.org/all/cover.1757423202.git.leonro@nvidia.com/" Link: https://lore.kernel.org/linux-block/20251114-block-with-mmio-v5-0-69d00f73d766@nvidia.com/ Signed-off-by: Jens Axboe <axboe@kernel.dk> * p2pdma-mmio-6.19.v5: block-dma: properly take MMIO path nvme-pci: migrate to dma_map_phys instead of map_page
Diffstat (limited to 'drivers')
-rw-r--r--drivers/nvme/host/pci.c90
1 files changed, 74 insertions, 16 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3c1727df1e36..9085bed107fd 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -260,8 +260,20 @@ enum nvme_iod_flags {
/* single segment dma mapping */
IOD_SINGLE_SEGMENT = 1U << 2,
+ /* Data payload contains p2p memory */
+ IOD_DATA_P2P = 1U << 3,
+
+ /* Metadata contains p2p memory */
+ IOD_META_P2P = 1U << 4,
+
+ /* Data payload contains MMIO memory */
+ IOD_DATA_MMIO = 1U << 5,
+
+ /* Metadata contains MMIO memory */
+ IOD_META_MMIO = 1U << 6,
+
/* Metadata using non-coalesced MPTR */
- IOD_SINGLE_META_SEGMENT = 1U << 5,
+ IOD_SINGLE_META_SEGMENT = 1U << 7,
};
struct nvme_dma_vec {
@@ -698,20 +710,20 @@ static void nvme_free_descriptors(struct request *req)
}
}
-static void nvme_free_prps(struct request *req)
+static void nvme_free_prps(struct request *req, unsigned int attrs)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
unsigned int i;
for (i = 0; i < iod->nr_dma_vecs; i++)
- dma_unmap_page(nvmeq->dev->dev, iod->dma_vecs[i].addr,
- iod->dma_vecs[i].len, rq_dma_dir(req));
+ dma_unmap_phys(nvmeq->dev->dev, iod->dma_vecs[i].addr,
+ iod->dma_vecs[i].len, rq_dma_dir(req), attrs);
mempool_free(iod->dma_vecs, nvmeq->dev->dmavec_mempool);
}
static void nvme_free_sgls(struct request *req, struct nvme_sgl_desc *sge,
- struct nvme_sgl_desc *sg_list)
+ struct nvme_sgl_desc *sg_list, unsigned int attrs)
{
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
enum dma_data_direction dir = rq_dma_dir(req);
@@ -720,22 +732,25 @@ static void nvme_free_sgls(struct request *req, struct nvme_sgl_desc *sge,
unsigned int i;
if (sge->type == (NVME_SGL_FMT_DATA_DESC << 4)) {
- dma_unmap_page(dma_dev, le64_to_cpu(sge->addr), len, dir);
+ dma_unmap_phys(dma_dev, le64_to_cpu(sge->addr), len, dir,
+ attrs);
return;
}
for (i = 0; i < len / sizeof(*sg_list); i++)
- dma_unmap_page(dma_dev, le64_to_cpu(sg_list[i].addr),
- le32_to_cpu(sg_list[i].length), dir);
+ dma_unmap_phys(dma_dev, le64_to_cpu(sg_list[i].addr),
+ le32_to_cpu(sg_list[i].length), dir, attrs);
}
static void nvme_unmap_metadata(struct request *req)
{
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
+ enum pci_p2pdma_map_type map = PCI_P2PDMA_MAP_NONE;
enum dma_data_direction dir = rq_dma_dir(req);
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct device *dma_dev = nvmeq->dev->dev;
struct nvme_sgl_desc *sge = iod->meta_descriptor;
+ unsigned int attrs = 0;
if (iod->flags & IOD_SINGLE_META_SEGMENT) {
dma_unmap_page(dma_dev, iod->meta_dma,
@@ -744,13 +759,20 @@ static void nvme_unmap_metadata(struct request *req)
return;
}
- if (!blk_rq_integrity_dma_unmap(req, dma_dev, &iod->meta_dma_state,
- iod->meta_total_len)) {
+ if (iod->flags & IOD_META_P2P)
+ map = PCI_P2PDMA_MAP_BUS_ADDR;
+ else if (iod->flags & IOD_META_MMIO) {
+ map = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE;
+ attrs |= DMA_ATTR_MMIO;
+ }
+
+ if (!blk_rq_dma_unmap(req, dma_dev, &iod->meta_dma_state,
+ iod->meta_total_len, map)) {
if (nvme_pci_cmd_use_meta_sgl(&iod->cmd))
- nvme_free_sgls(req, sge, &sge[1]);
+ nvme_free_sgls(req, sge, &sge[1], attrs);
else
- dma_unmap_page(dma_dev, iod->meta_dma,
- iod->meta_total_len, dir);
+ dma_unmap_phys(dma_dev, iod->meta_dma,
+ iod->meta_total_len, dir, attrs);
}
if (iod->meta_descriptor)
@@ -760,9 +782,11 @@ static void nvme_unmap_metadata(struct request *req)
static void nvme_unmap_data(struct request *req)
{
+ enum pci_p2pdma_map_type map = PCI_P2PDMA_MAP_NONE;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
struct device *dma_dev = nvmeq->dev->dev;
+ unsigned int attrs = 0;
if (iod->flags & IOD_SINGLE_SEGMENT) {
static_assert(offsetof(union nvme_data_ptr, prp1) ==
@@ -772,12 +796,20 @@ static void nvme_unmap_data(struct request *req)
return;
}
- if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len)) {
+ if (iod->flags & IOD_DATA_P2P)
+ map = PCI_P2PDMA_MAP_BUS_ADDR;
+ else if (iod->flags & IOD_DATA_MMIO) {
+ map = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE;
+ attrs |= DMA_ATTR_MMIO;
+ }
+
+ if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len,
+ map)) {
if (nvme_pci_cmd_use_sgl(&iod->cmd))
nvme_free_sgls(req, iod->descriptors[0],
- &iod->cmd.common.dptr.sgl);
+ &iod->cmd.common.dptr.sgl, attrs);
else
- nvme_free_prps(req);
+ nvme_free_prps(req, attrs);
}
if (iod->nr_descriptors)
@@ -1048,6 +1080,19 @@ static blk_status_t nvme_map_data(struct request *req)
if (!blk_rq_dma_map_iter_start(req, dev->dev, &iod->dma_state, &iter))
return iter.status;
+ switch (iter.p2pdma.map) {
+ case PCI_P2PDMA_MAP_BUS_ADDR:
+ iod->flags |= IOD_DATA_P2P;
+ break;
+ case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
+ iod->flags |= IOD_DATA_MMIO;
+ break;
+ case PCI_P2PDMA_MAP_NONE:
+ break;
+ default:
+ return BLK_STS_RESOURCE;
+ }
+
if (use_sgl == SGL_FORCED ||
(use_sgl == SGL_SUPPORTED &&
(sgl_threshold && nvme_pci_avg_seg_size(req) >= sgl_threshold)))
@@ -1070,6 +1115,19 @@ static blk_status_t nvme_pci_setup_meta_sgls(struct request *req)
&iod->meta_dma_state, &iter))
return iter.status;
+ switch (iter.p2pdma.map) {
+ case PCI_P2PDMA_MAP_BUS_ADDR:
+ iod->flags |= IOD_META_P2P;
+ break;
+ case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
+ iod->flags |= IOD_META_MMIO;
+ break;
+ case PCI_P2PDMA_MAP_NONE:
+ break;
+ default:
+ return BLK_STS_RESOURCE;
+ }
+
if (blk_rq_dma_map_coalesce(&iod->meta_dma_state))
entries = 1;