summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-03-26 08:22:07 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-03-26 08:22:07 -0700
commitdabb83ecf404c74a75469e7694a0b891e71f61b7 (patch)
treea53afeb2f6bed413ae94bc72512ee604d03463ca
parent0138af2472dfdef0d56fc4697416eaa0ff2589bd (diff)
parent2cdaff22ed26f1e619aa2b43f27bb84f2c6ef8f8 (diff)
Merge tag 'dma-mapping-7.0-2026-03-25' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux
Pull dma-mapping fixes from Marek Szyprowski: "A set of fixes for DMA-mapping subsystem, which resolve false- positive warnings from KMSAN and DMA-API debug (Shigeru Yoshida and Leon Romanovsky) as well as a simple build fix (Miguel Ojeda)" * tag 'dma-mapping-7.0-2026-03-25' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux: dma-mapping: add missing `inline` for `dma_free_attrs` mm/hmm: Indicate that HMM requires DMA coherency RDMA/umem: Tell DMA mapping that UMEM requires coherency iommu/dma: add support for DMA_ATTR_REQUIRE_COHERENT attribute dma-direct: prevent SWIOTLB path when DMA_ATTR_REQUIRE_COHERENT is set dma-mapping: Introduce DMA require coherency attribute dma-mapping: Clarify valid conditions for CPU cache line overlap dma-mapping: handle DMA_ATTR_CPU_CACHE_CLEAN in trace output dma-debug: Allow multiple invocations of overlapping entries dma: swiotlb: add KMSAN annotations to swiotlb_bounce()
-rw-r--r--Documentation/core-api/dma-attributes.rst36
-rw-r--r--drivers/infiniband/core/umem.c5
-rw-r--r--drivers/iommu/dma-iommu.c21
-rw-r--r--drivers/virtio/virtio_ring.c10
-rw-r--r--include/linux/dma-mapping.h19
-rw-r--r--include/trace/events/dma.h4
-rw-r--r--kernel/dma/debug.c9
-rw-r--r--kernel/dma/direct.h7
-rw-r--r--kernel/dma/mapping.c6
-rw-r--r--kernel/dma/swiotlb.c21
-rw-r--r--mm/hmm.c4
11 files changed, 106 insertions, 36 deletions
diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst
index 1d7bfad73b1c..123c8468d58f 100644
--- a/Documentation/core-api/dma-attributes.rst
+++ b/Documentation/core-api/dma-attributes.rst
@@ -149,11 +149,33 @@ For architectures that require cache flushing for DMA coherence
DMA_ATTR_MMIO will not perform any cache flushing. The address
provided must never be mapped cacheable into the CPU.
-DMA_ATTR_CPU_CACHE_CLEAN
-------------------------
+DMA_ATTR_DEBUGGING_IGNORE_CACHELINES
+------------------------------------
-This attribute indicates the CPU will not dirty any cacheline overlapping this
-DMA_FROM_DEVICE/DMA_BIDIRECTIONAL buffer while it is mapped. This allows
-multiple small buffers to safely share a cacheline without risk of data
-corruption, suppressing DMA debug warnings about overlapping mappings.
-All mappings sharing a cacheline should have this attribute.
+This attribute indicates that CPU cache lines may overlap for buffers mapped
+with DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
+
+Such overlap may occur when callers map multiple small buffers that reside
+within the same cache line. In this case, callers must guarantee that the CPU
+will not dirty these cache lines after the mappings are established. When this
+condition is met, multiple buffers can safely share a cache line without risking
+data corruption.
+
+All mappings that share a cache line must set this attribute to suppress DMA
+debug warnings about overlapping mappings.
+
+DMA_ATTR_REQUIRE_COHERENT
+-------------------------
+
+DMA mapping requests with the DMA_ATTR_REQUIRE_COHERENT fail on any
+system where SWIOTLB or cache management is required. This should only
+be used to support uAPI designs that require continuous HW DMA
+coherence with userspace processes, for example RDMA and DRM. At a
+minimum the memory being mapped must be userspace memory from
+pin_user_pages() or similar.
+
+Drivers should consider using dma_mmap_pages() instead of this
+interface when building their uAPIs, when possible.
+
+It must never be used in an in-kernel driver that only works with
+kernel memory.
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index cff4fcca2c34..edc34c69f0f2 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -55,7 +55,8 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
if (dirty)
ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt,
- DMA_BIDIRECTIONAL, 0);
+ DMA_BIDIRECTIONAL,
+ DMA_ATTR_REQUIRE_COHERENT);
for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) {
unpin_user_page_range_dirty_lock(sg_page(sg),
@@ -169,7 +170,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
unsigned long lock_limit;
unsigned long new_pinned;
unsigned long cur_base;
- unsigned long dma_attr = 0;
+ unsigned long dma_attr = DMA_ATTR_REQUIRE_COHERENT;
struct mm_struct *mm;
unsigned long npages;
int pinned, ret;
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 5dac64be61bb..94d514169642 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1211,7 +1211,7 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
*/
if (dev_use_swiotlb(dev, size, dir) &&
iova_unaligned(iovad, phys, size)) {
- if (attrs & DMA_ATTR_MMIO)
+ if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
return DMA_MAPPING_ERROR;
phys = iommu_dma_map_swiotlb(dev, phys, size, dir, attrs);
@@ -1223,7 +1223,8 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
arch_sync_dma_for_device(phys, size, dir);
iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
- if (iova == DMA_MAPPING_ERROR && !(attrs & DMA_ATTR_MMIO))
+ if (iova == DMA_MAPPING_ERROR &&
+ !(attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)))
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
return iova;
}
@@ -1233,7 +1234,7 @@ void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle,
{
phys_addr_t phys;
- if (attrs & DMA_ATTR_MMIO) {
+ if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) {
__iommu_dma_unmap(dev, dma_handle, size);
return;
}
@@ -1945,9 +1946,21 @@ int dma_iova_link(struct device *dev, struct dma_iova_state *state,
if (WARN_ON_ONCE(iova_start_pad && offset > 0))
return -EIO;
+ /*
+ * DMA_IOVA_USE_SWIOTLB is set on state after some entry
+ * took SWIOTLB path, which we were supposed to prevent
+ * for DMA_ATTR_REQUIRE_COHERENT attribute.
+ */
+ if (WARN_ON_ONCE((state->__size & DMA_IOVA_USE_SWIOTLB) &&
+ (attrs & DMA_ATTR_REQUIRE_COHERENT)))
+ return -EOPNOTSUPP;
+
+ if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT))
+ return -EOPNOTSUPP;
+
if (dev_use_swiotlb(dev, size, dir) &&
iova_unaligned(iovad, phys, size)) {
- if (attrs & DMA_ATTR_MMIO)
+ if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
return -EPERM;
return iommu_dma_iova_link_swiotlb(dev, state, phys, offset,
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 335692d41617..fbca7ce1c6bf 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2912,10 +2912,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
* @data: the token identifying the buffer.
* @gfp: how to do memory allocations (if necessary).
*
- * Same as virtqueue_add_inbuf but passes DMA_ATTR_CPU_CACHE_CLEAN to indicate
- * that the CPU will not dirty any cacheline overlapping this buffer while it
- * is available, and to suppress overlapping cacheline warnings in DMA debug
- * builds.
+ * Same as virtqueue_add_inbuf but passes DMA_ATTR_DEBUGGING_IGNORE_CACHELINES
+ * to indicate that the CPU will not dirty any cacheline overlapping this buffer
+ * while it is available, and to suppress overlapping cacheline warnings in DMA
+ * debug builds.
*
* Caller must ensure we don't call this with other virtqueue operations
* at the same time (except where noted).
@@ -2928,7 +2928,7 @@ int virtqueue_add_inbuf_cache_clean(struct virtqueue *vq,
gfp_t gfp)
{
return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp,
- DMA_ATTR_CPU_CACHE_CLEAN);
+ DMA_ATTR_DEBUGGING_IGNORE_CACHELINES);
}
EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_cache_clean);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 29973baa0581..99ef042ecdb4 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -80,11 +80,18 @@
#define DMA_ATTR_MMIO (1UL << 10)
/*
- * DMA_ATTR_CPU_CACHE_CLEAN: Indicates the CPU will not dirty any cacheline
- * overlapping this buffer while it is mapped for DMA. All mappings sharing
- * a cacheline must have this attribute for this to be considered safe.
+ * DMA_ATTR_DEBUGGING_IGNORE_CACHELINES: Indicates the CPU cache line can be
+ * overlapped. All mappings sharing a cacheline must have this attribute for
+ * this to be considered safe.
*/
-#define DMA_ATTR_CPU_CACHE_CLEAN (1UL << 11)
+#define DMA_ATTR_DEBUGGING_IGNORE_CACHELINES (1UL << 11)
+
+/*
+ * DMA_ATTR_REQUIRE_COHERENT: Indicates that DMA coherency is required.
+ * All mappings that carry this attribute can't work with SWIOTLB and cache
+ * flushing.
+ */
+#define DMA_ATTR_REQUIRE_COHERENT (1UL << 12)
/*
* A dma_addr_t can hold any valid DMA or bus address for the platform. It can
@@ -248,8 +255,8 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size,
{
return NULL;
}
-static void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t dma_handle, unsigned long attrs)
+static inline void dma_free_attrs(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
{
}
static inline void *dmam_alloc_attrs(struct device *dev, size_t size,
diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h
index 33e99e792f1a..63597b004424 100644
--- a/include/trace/events/dma.h
+++ b/include/trace/events/dma.h
@@ -32,7 +32,9 @@ TRACE_DEFINE_ENUM(DMA_NONE);
{ DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \
{ DMA_ATTR_NO_WARN, "NO_WARN" }, \
{ DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \
- { DMA_ATTR_MMIO, "MMIO" })
+ { DMA_ATTR_MMIO, "MMIO" }, \
+ { DMA_ATTR_DEBUGGING_IGNORE_CACHELINES, "CACHELINES_OVERLAP" }, \
+ { DMA_ATTR_REQUIRE_COHERENT, "REQUIRE_COHERENT" })
DECLARE_EVENT_CLASS(dma_map,
TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 86f87e43438c..0677918f06a8 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -453,7 +453,7 @@ static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
return overlap;
}
-static void active_cacheline_inc_overlap(phys_addr_t cln)
+static void active_cacheline_inc_overlap(phys_addr_t cln, bool is_cache_clean)
{
int overlap = active_cacheline_read_overlap(cln);
@@ -462,7 +462,7 @@ static void active_cacheline_inc_overlap(phys_addr_t cln)
/* If we overflowed the overlap counter then we're potentially
* leaking dma-mappings.
*/
- WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
+ WARN_ONCE(!is_cache_clean && overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
pr_fmt("exceeded %d overlapping mappings of cacheline %pa\n"),
ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
}
@@ -495,7 +495,7 @@ static int active_cacheline_insert(struct dma_debug_entry *entry,
if (rc == -EEXIST) {
struct dma_debug_entry *existing;
- active_cacheline_inc_overlap(cln);
+ active_cacheline_inc_overlap(cln, entry->is_cache_clean);
existing = radix_tree_lookup(&dma_active_cacheline, cln);
/* A lookup failure here after we got -EEXIST is unexpected. */
WARN_ON(!existing);
@@ -601,7 +601,8 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs)
unsigned long flags;
int rc;
- entry->is_cache_clean = !!(attrs & DMA_ATTR_CPU_CACHE_CLEAN);
+ entry->is_cache_clean = attrs & (DMA_ATTR_DEBUGGING_IGNORE_CACHELINES |
+ DMA_ATTR_REQUIRE_COHERENT);
bucket = get_hash_bucket(entry, &flags);
hash_bucket_add(bucket, entry);
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index e89f175e9c2d..6184ff303f08 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -84,7 +84,7 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
dma_addr_t dma_addr;
if (is_swiotlb_force_bounce(dev)) {
- if (attrs & DMA_ATTR_MMIO)
+ if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
return DMA_MAPPING_ERROR;
return swiotlb_map(dev, phys, size, dir, attrs);
@@ -98,7 +98,8 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
dma_addr = phys_to_dma(dev, phys);
if (unlikely(!dma_capable(dev, dma_addr, size, true)) ||
dma_kmalloc_needs_bounce(dev, size, dir)) {
- if (is_swiotlb_active(dev))
+ if (is_swiotlb_active(dev) &&
+ !(attrs & DMA_ATTR_REQUIRE_COHERENT))
return swiotlb_map(dev, phys, size, dir, attrs);
goto err_overflow;
@@ -123,7 +124,7 @@ static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr,
{
phys_addr_t phys;
- if (attrs & DMA_ATTR_MMIO)
+ if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
/* nothing to do: uncached and no swiotlb */
return;
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 3928a509c44c..6d3dd0bd3a88 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -164,6 +164,9 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
if (WARN_ON_ONCE(!dev->dma_mask))
return DMA_MAPPING_ERROR;
+ if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT))
+ return DMA_MAPPING_ERROR;
+
if (dma_map_direct(dev, ops) ||
(!is_mmio && arch_dma_map_phys_direct(dev, phys + size)))
addr = dma_direct_map_phys(dev, phys, size, dir, attrs);
@@ -235,6 +238,9 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
BUG_ON(!valid_dma_direction(dir));
+ if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT))
+ return -EOPNOTSUPP;
+
if (WARN_ON_ONCE(!dev->dma_mask))
return 0;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index d8e6f1d889d5..9fd73700ddcf 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -30,6 +30,7 @@
#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/io.h>
+#include <linux/kmsan-checks.h>
#include <linux/iommu-helper.h>
#include <linux/init.h>
#include <linux/memblock.h>
@@ -901,10 +902,19 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
local_irq_save(flags);
page = pfn_to_page(pfn);
- if (dir == DMA_TO_DEVICE)
+ if (dir == DMA_TO_DEVICE) {
+ /*
+ * Ideally, kmsan_check_highmem_page()
+ * could be used here to detect infoleaks,
+ * but callers may map uninitialized buffers
+ * that will be written by the device,
+ * causing false positives.
+ */
memcpy_from_page(vaddr, page, offset, sz);
- else
+ } else {
+ kmsan_unpoison_memory(vaddr, sz);
memcpy_to_page(page, offset, vaddr, sz);
+ }
local_irq_restore(flags);
size -= sz;
@@ -913,8 +923,15 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
offset = 0;
}
} else if (dir == DMA_TO_DEVICE) {
+ /*
+ * Ideally, kmsan_check_memory() could be used here to detect
+ * infoleaks (uninitialized data being sent to device), but
+ * callers may map uninitialized buffers that will be written
+ * by the device, causing false positives.
+ */
memcpy(vaddr, phys_to_virt(orig_addr), size);
} else {
+ kmsan_unpoison_memory(vaddr, size);
memcpy(phys_to_virt(orig_addr), vaddr, size);
}
}
diff --git a/mm/hmm.c b/mm/hmm.c
index f6c4ddff4bd6..5955f2f0c83d 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -778,7 +778,7 @@ dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map,
struct page *page = hmm_pfn_to_page(pfns[idx]);
phys_addr_t paddr = hmm_pfn_to_phys(pfns[idx]);
size_t offset = idx * map->dma_entry_size;
- unsigned long attrs = 0;
+ unsigned long attrs = DMA_ATTR_REQUIRE_COHERENT;
dma_addr_t dma_addr;
int ret;
@@ -871,7 +871,7 @@ bool hmm_dma_unmap_pfn(struct device *dev, struct hmm_dma_map *map, size_t idx)
struct dma_iova_state *state = &map->state;
dma_addr_t *dma_addrs = map->dma_list;
unsigned long *pfns = map->pfn_list;
- unsigned long attrs = 0;
+ unsigned long attrs = DMA_ATTR_REQUIRE_COHERENT;
if ((pfns[idx] & valid_dma) != valid_dma)
return false;