diff options
| author | Jens Axboe <axboe@kernel.dk> | 2025-11-14 05:10:09 -0700 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2025-11-14 05:10:09 -0700 |
| commit | fa0d2dc69e945ddbb1d023dfb4f7ba4103523213 (patch) | |
| tree | 4512b28a5e01e2d581381a7f75fce083be3e906b /drivers | |
| parent | 8e1bf774ab18157cb8041628f2661aa12e425914 (diff) | |
| parent | 37f0c7a8df7ad719a68fa1c2dbf066cfebc391a7 (diff) | |
Merge branch 'p2pdma-mmio-6.19.v5' into for-6.19/block
Merge MMIO P2P DMA series from Leon:
"This patch series improves block layer and NVMe driver support for MMIO
memory regions, particularly for peer-to-peer (P2P) DMA transfers that
go through the host bridge.
The series addresses a critical gap where P2P transfers through the
host bridge (PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) were not properly marked
as MMIO memory, leading to potential issues with:
- Inappropriate CPU cache synchronization operations on MMIO regions
- Incorrect DMA mapping/unmapping that doesn't respect MMIO semantics
- Missing IOMMU configuration for MMIO memory handling
This work is extracted from the larger DMA physical API improvement
series [1] and focuses specifically on block layer and NVMe
requirements for MMIO memory support.
[1] https://lore.kernel.org/all/cover.1757423202.git.leonro@nvidia.com/"
Link: https://lore.kernel.org/linux-block/20251114-block-with-mmio-v5-0-69d00f73d766@nvidia.com/
Signed-off-by: Jens Axboe <axboe@kernel.dk>
* p2pdma-mmio-6.19.v5:
block-dma: properly take MMIO path
nvme-pci: migrate to dma_map_phys instead of map_page
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/nvme/host/pci.c | 90 |
1 files changed, 74 insertions, 16 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3c1727df1e36..9085bed107fd 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -260,8 +260,20 @@ enum nvme_iod_flags { /* single segment dma mapping */ IOD_SINGLE_SEGMENT = 1U << 2, + /* Data payload contains p2p memory */ + IOD_DATA_P2P = 1U << 3, + + /* Metadata contains p2p memory */ + IOD_META_P2P = 1U << 4, + + /* Data payload contains MMIO memory */ + IOD_DATA_MMIO = 1U << 5, + + /* Metadata contains MMIO memory */ + IOD_META_MMIO = 1U << 6, + /* Metadata using non-coalesced MPTR */ - IOD_SINGLE_META_SEGMENT = 1U << 5, + IOD_SINGLE_META_SEGMENT = 1U << 7, }; struct nvme_dma_vec { @@ -698,20 +710,20 @@ static void nvme_free_descriptors(struct request *req) } } -static void nvme_free_prps(struct request *req) +static void nvme_free_prps(struct request *req, unsigned int attrs) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = req->mq_hctx->driver_data; unsigned int i; for (i = 0; i < iod->nr_dma_vecs; i++) - dma_unmap_page(nvmeq->dev->dev, iod->dma_vecs[i].addr, - iod->dma_vecs[i].len, rq_dma_dir(req)); + dma_unmap_phys(nvmeq->dev->dev, iod->dma_vecs[i].addr, + iod->dma_vecs[i].len, rq_dma_dir(req), attrs); mempool_free(iod->dma_vecs, nvmeq->dev->dmavec_mempool); } static void nvme_free_sgls(struct request *req, struct nvme_sgl_desc *sge, - struct nvme_sgl_desc *sg_list) + struct nvme_sgl_desc *sg_list, unsigned int attrs) { struct nvme_queue *nvmeq = req->mq_hctx->driver_data; enum dma_data_direction dir = rq_dma_dir(req); @@ -720,22 +732,25 @@ static void nvme_free_sgls(struct request *req, struct nvme_sgl_desc *sge, unsigned int i; if (sge->type == (NVME_SGL_FMT_DATA_DESC << 4)) { - dma_unmap_page(dma_dev, le64_to_cpu(sge->addr), len, dir); + dma_unmap_phys(dma_dev, le64_to_cpu(sge->addr), len, dir, + attrs); return; } for (i = 0; i < len / sizeof(*sg_list); i++) - dma_unmap_page(dma_dev, le64_to_cpu(sg_list[i].addr), - le32_to_cpu(sg_list[i].length), dir); + dma_unmap_phys(dma_dev, le64_to_cpu(sg_list[i].addr), + le32_to_cpu(sg_list[i].length), dir, attrs); } static void nvme_unmap_metadata(struct request *req) { struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + enum pci_p2pdma_map_type map = PCI_P2PDMA_MAP_NONE; enum dma_data_direction dir = rq_dma_dir(req); struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct device *dma_dev = nvmeq->dev->dev; struct nvme_sgl_desc *sge = iod->meta_descriptor; + unsigned int attrs = 0; if (iod->flags & IOD_SINGLE_META_SEGMENT) { dma_unmap_page(dma_dev, iod->meta_dma, @@ -744,13 +759,20 @@ static void nvme_unmap_metadata(struct request *req) return; } - if (!blk_rq_integrity_dma_unmap(req, dma_dev, &iod->meta_dma_state, - iod->meta_total_len)) { + if (iod->flags & IOD_META_P2P) + map = PCI_P2PDMA_MAP_BUS_ADDR; + else if (iod->flags & IOD_META_MMIO) { + map = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; + attrs |= DMA_ATTR_MMIO; + } + + if (!blk_rq_dma_unmap(req, dma_dev, &iod->meta_dma_state, + iod->meta_total_len, map)) { if (nvme_pci_cmd_use_meta_sgl(&iod->cmd)) - nvme_free_sgls(req, sge, &sge[1]); + nvme_free_sgls(req, sge, &sge[1], attrs); else - dma_unmap_page(dma_dev, iod->meta_dma, - iod->meta_total_len, dir); + dma_unmap_phys(dma_dev, iod->meta_dma, + iod->meta_total_len, dir, attrs); } if (iod->meta_descriptor) @@ -760,9 +782,11 @@ static void nvme_unmap_metadata(struct request *req) static void nvme_unmap_data(struct request *req) { + enum pci_p2pdma_map_type map = PCI_P2PDMA_MAP_NONE; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = req->mq_hctx->driver_data; struct device *dma_dev = nvmeq->dev->dev; + unsigned int attrs = 0; if (iod->flags & IOD_SINGLE_SEGMENT) { static_assert(offsetof(union nvme_data_ptr, prp1) == @@ -772,12 +796,20 @@ static void nvme_unmap_data(struct request *req) return; } - if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len)) { + if (iod->flags & IOD_DATA_P2P) + map = PCI_P2PDMA_MAP_BUS_ADDR; + else if (iod->flags & IOD_DATA_MMIO) { + map = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; + attrs |= DMA_ATTR_MMIO; + } + + if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len, + map)) { if (nvme_pci_cmd_use_sgl(&iod->cmd)) nvme_free_sgls(req, iod->descriptors[0], - &iod->cmd.common.dptr.sgl); + &iod->cmd.common.dptr.sgl, attrs); else - nvme_free_prps(req); + nvme_free_prps(req, attrs); } if (iod->nr_descriptors) @@ -1048,6 +1080,19 @@ static blk_status_t nvme_map_data(struct request *req) if (!blk_rq_dma_map_iter_start(req, dev->dev, &iod->dma_state, &iter)) return iter.status; + switch (iter.p2pdma.map) { + case PCI_P2PDMA_MAP_BUS_ADDR: + iod->flags |= IOD_DATA_P2P; + break; + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: + iod->flags |= IOD_DATA_MMIO; + break; + case PCI_P2PDMA_MAP_NONE: + break; + default: + return BLK_STS_RESOURCE; + } + if (use_sgl == SGL_FORCED || (use_sgl == SGL_SUPPORTED && (sgl_threshold && nvme_pci_avg_seg_size(req) >= sgl_threshold))) @@ -1070,6 +1115,19 @@ static blk_status_t nvme_pci_setup_meta_sgls(struct request *req) &iod->meta_dma_state, &iter)) return iter.status; + switch (iter.p2pdma.map) { + case PCI_P2PDMA_MAP_BUS_ADDR: + iod->flags |= IOD_META_P2P; + break; + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: + iod->flags |= IOD_META_MMIO; + break; + case PCI_P2PDMA_MAP_NONE: + break; + default: + return BLK_STS_RESOURCE; + } + if (blk_rq_dma_map_coalesce(&iod->meta_dma_state)) entries = 1; |
