summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ports/stm32/dma.c53
-rw-r--r--ports/stm32/dma.h38
-rw-r--r--ports/stm32/mpu.h16
-rw-r--r--ports/stm32/spi.c8
-rw-r--r--tests/ports/stm32_hardware/dma_alignment.py43
-rw-r--r--tests/ports/stm32_hardware/dma_alignment.py.exp2
6 files changed, 158 insertions, 2 deletions
diff --git a/ports/stm32/dma.c b/ports/stm32/dma.c
index af56d8123..df8a408cb 100644
--- a/ports/stm32/dma.c
+++ b/ports/stm32/dma.c
@@ -33,6 +33,7 @@
#include "systick.h"
#include "dma.h"
#include "irq.h"
+#include "mpu.h"
// When this option is enabled, the DMA will turn off automatically after
// a period of inactivity.
@@ -1852,3 +1853,55 @@ void dma_external_acquire(uint32_t controller, uint32_t stream) {
void dma_external_release(uint32_t controller, uint32_t stream) {
dma_disable_clock(DMA_ID_FROM_CONTROLLER_STREAM(controller, stream));
}
+
+#if __DCACHE_PRESENT
+
+void dma_protect_rx_region(void *dest, size_t len) {
+ #if __DCACHE_PRESENT
+ uint32_t start_addr = (uint32_t)dest;
+ uint32_t start_aligned = start_addr & ~(__SCB_DCACHE_LINE_SIZE - 1U);
+ uint32_t end_addr = start_addr + len - 1; // Address of last byte in the buffer
+ uint32_t end_aligned = end_addr & ~(__SCB_DCACHE_LINE_SIZE - 1U);
+
+ uint32_t irq_state = mpu_config_start();
+
+ // Clean (write back) any cached memory in this region, so there's no dirty
+ // cache entries that might be written back later after DMA RX is done.
+ MP_HAL_CLEAN_DCACHE(dest, len);
+
+ // The way we protect the whole region is to mark the first and last cache
+ // line as UNCACHED using the MPU. This means any unrelated reads/writes in
+ // these cache lines will bypass the cache, and can coexist with DMA also
+ // writing to parts of these cache lines.
+ //
+ // This is redundant sometimes (because the DMA region fills the entire cache line, or because
+ // the region fits in a single cache line.) However, the implementation is only 3 register writes so
+ // it's more efficient to call it every time.
+ mpu_config_region(MPU_REGION_DMA_UNCACHED_1, start_aligned, MPU_CONFIG_UNCACHED(MPU_REGION_SIZE_32B));
+ mpu_config_region(MPU_REGION_DMA_UNCACHED_2, end_aligned, MPU_CONFIG_UNCACHED(MPU_REGION_SIZE_32B));
+
+ mpu_config_end(irq_state);
+ #endif
+}
+
+void dma_unprotect_rx_region(void *dest, size_t len) {
+ #if __DCACHE_PRESENT
+ uint32_t irq_state = mpu_config_start();
+
+ // Disabling these regions removes them from the MPU
+ mpu_config_region(MPU_REGION_DMA_UNCACHED_1, 0, MPU_CONFIG_DISABLE);
+ mpu_config_region(MPU_REGION_DMA_UNCACHED_2, 0, MPU_CONFIG_DISABLE);
+
+ // Invalidate the whole region in the cache. This may seem redundant, but it
+ // is possible that during the DMA operation the CPU read inside this region
+ // (excluding the first & last cache lines), and cache lines were filled.
+ //
+ // (This can happen in SPI if src==dest, for example, possibly due to speculative
+ // cache line fills.)
+ MP_HAL_CLEANINVALIDATE_DCACHE(dest, len);
+
+ mpu_config_end(irq_state);
+ #endif
+}
+
+#endif
diff --git a/ports/stm32/dma.h b/ports/stm32/dma.h
index fa3238413..7ba04baf5 100644
--- a/ports/stm32/dma.h
+++ b/ports/stm32/dma.h
@@ -26,6 +26,8 @@
#ifndef MICROPY_INCLUDED_STM32_DMA_H
#define MICROPY_INCLUDED_STM32_DMA_H
+#include "py/mpconfig.h"
+
typedef struct _dma_descr_t dma_descr_t;
#if defined(STM32H5)
@@ -164,4 +166,40 @@ void dma_nohal_start(const dma_descr_t *descr, uint32_t src_addr, uint32_t dst_a
void dma_external_acquire(uint32_t controller, uint32_t stream);
void dma_external_release(uint32_t controller, uint32_t stream);
+#if __DCACHE_PRESENT
+// On chips with D-Cache, it's necessary to call this function before using DMA to read
+// into a user-supplied buffer. It does two things:
+//
+// 1) Cleans this region in the cache.
+//
+// 2) Temporarily disables caching for the first and last cache lines of the
+// buffer. This prevents cache coherency issues if the start or end of the
+// buffer don't align to a cache line. (For example, this can happen if the CPU
+// accesses unrelated memory adjacent to the buffer and in the same cache
+// line.)
+//
+// Only one region can be protected in this way at a time.
+void dma_protect_rx_region(void *dest, size_t len);
+
+// Release the region protected by dma_protect_rx_region().
+//
+// Call this after the DMA operation completes, before the CPU reads any of the
+// data that was written.
+//
+// As well as restoring data caching, this function invalidates D-cache for the
+// entire specified region.
+void dma_unprotect_rx_region(void *dest, size_t len);
+
+#else
+
+inline static void dma_protect_rx_region(uint8_t *dest, size_t len) {
+ // No-ops on targets without D-Cache.
+}
+
+inline static void dma_unprotect_rx_region(void *dest, size_t len) {
+
+}
+
+#endif // __DCACHE_PRESENT
+
#endif // MICROPY_INCLUDED_STM32_DMA_H
diff --git a/ports/stm32/mpu.h b/ports/stm32/mpu.h
index 62a428198..64880a85d 100644
--- a/ports/stm32/mpu.h
+++ b/ports/stm32/mpu.h
@@ -37,6 +37,10 @@
#define MPU_REGION_SDRAM1 (MPU_REGION_NUMBER4)
#define MPU_REGION_SDRAM2 (MPU_REGION_NUMBER5)
+// Only relevant on CPUs with D-Cache, must be higher priority than SDRAM
+#define MPU_REGION_DMA_UNCACHED_1 (MPU_REGION_NUMBER6)
+#define MPU_REGION_DMA_UNCACHED_2 (MPU_REGION_NUMBER7)
+
// Attribute value to disable a region entirely, remove it from the MPU
// (i.e. the MPU_REGION_ENABLE bit is unset.)
#define MPU_CONFIG_DISABLE 0
@@ -78,6 +82,18 @@
| MPU_REGION_ENABLE << MPU_RASR_ENABLE_Pos \
)
+#define MPU_CONFIG_UNCACHED(size) ( \
+ MPU_INSTRUCTION_ACCESS_DISABLE << MPU_RASR_XN_Pos \
+ | MPU_REGION_FULL_ACCESS << MPU_RASR_AP_Pos \
+ | MPU_TEX_LEVEL1 << MPU_RASR_TEX_Pos \
+ | MPU_ACCESS_NOT_SHAREABLE << MPU_RASR_S_Pos \
+ | MPU_ACCESS_NOT_CACHEABLE << MPU_RASR_C_Pos \
+ | MPU_ACCESS_NOT_BUFFERABLE << MPU_RASR_B_Pos \
+ | 0x00 << MPU_RASR_SRD_Pos \
+ | (size) << MPU_RASR_SIZE_Pos \
+ | MPU_REGION_ENABLE << MPU_RASR_ENABLE_Pos \
+ )
+
static inline void mpu_init(void) {
MPU->CTRL = MPU_PRIVILEGED_DEFAULT | MPU_CTRL_ENABLE_Msk;
SCB->SHCSR |= SCB_SHCSR_MEMFAULTENA_Msk;
diff --git a/ports/stm32/spi.c b/ports/stm32/spi.c
index 5aace8e9a..607b3fe68 100644
--- a/ports/stm32/spi.c
+++ b/ports/stm32/spi.c
@@ -571,6 +571,8 @@ void spi_transfer(const spi_t *self, size_t len, const uint8_t *src, uint8_t *de
// Note: DMA transfers are limited to 65535 bytes at a time.
HAL_StatusTypeDef status;
+ void *odest = dest; // Original values of dest & len
+ size_t olen = len;
if (dest == NULL) {
// send only
@@ -613,7 +615,7 @@ void spi_transfer(const spi_t *self, size_t len, const uint8_t *src, uint8_t *de
}
dma_init(&rx_dma, self->rx_dma_descr, DMA_PERIPH_TO_MEMORY, self->spi);
self->spi->hdmarx = &rx_dma;
- MP_HAL_CLEANINVALIDATE_DCACHE(dest, len);
+ dma_protect_rx_region(dest, len);
uint32_t t_start = HAL_GetTick();
do {
uint32_t l = MIN(len, 65535);
@@ -632,6 +634,7 @@ void spi_transfer(const spi_t *self, size_t len, const uint8_t *src, uint8_t *de
dma_deinit(self->tx_dma_descr);
}
dma_deinit(self->rx_dma_descr);
+ dma_unprotect_rx_region(odest, olen);
}
} else {
// send and receive
@@ -644,7 +647,7 @@ void spi_transfer(const spi_t *self, size_t len, const uint8_t *src, uint8_t *de
dma_init(&rx_dma, self->rx_dma_descr, DMA_PERIPH_TO_MEMORY, self->spi);
self->spi->hdmarx = &rx_dma;
MP_HAL_CLEAN_DCACHE(src, len);
- MP_HAL_CLEANINVALIDATE_DCACHE(dest, len);
+ dma_protect_rx_region(dest, len);
uint32_t t_start = HAL_GetTick();
do {
uint32_t l = MIN(len, 65535);
@@ -662,6 +665,7 @@ void spi_transfer(const spi_t *self, size_t len, const uint8_t *src, uint8_t *de
} while (len);
dma_deinit(self->tx_dma_descr);
dma_deinit(self->rx_dma_descr);
+ dma_unprotect_rx_region(odest, olen);
}
}
diff --git a/tests/ports/stm32_hardware/dma_alignment.py b/tests/ports/stm32_hardware/dma_alignment.py
new file mode 100644
index 000000000..1836b25d8
--- /dev/null
+++ b/tests/ports/stm32_hardware/dma_alignment.py
@@ -0,0 +1,43 @@
+from machine import SPI
+# Regression test for DMA for DCache coherency bugs with cache line
+# written originally for https://github.com/micropython/micropython/issues/13471
+
+# IMPORTANT: This test requires SPI2 MISO (pin Y8 on Pyboard D) to be connected to GND
+
+SPI_NUM = 2
+
+spi = SPI(SPI_NUM, baudrate=5_000_000)
+buf = bytearray(1024)
+ok = True
+
+for offs in range(0, len(buf)):
+ v = memoryview(buf)[offs : offs + 128]
+ spi.readinto(v, 0xFF)
+ if not all(b == 0x00 for b in v):
+ print(offs, v.hex())
+ ok = False
+
+print("Variable offset fixed length " + ("OK" if ok else "FAIL"))
+
+# this takes around 30s to run, so skipped if already failing
+if ok:
+ for op_len in range(1, 66):
+ wr = b"\xFF" * op_len
+ for offs in range(1, len(buf) - op_len - 1):
+ # Place some "sentinel" values before and after the DMA buffer
+ before = offs & 0xFF
+ after = (~offs) & 0xFF
+ buf[offs - 1] = before
+ buf[offs + op_len] = after
+ v = memoryview(buf)[offs : offs + op_len]
+ spi.write_readinto(wr, v)
+ if (
+ not all(b == 0x00 for b in v)
+ or buf[offs - 1] != before
+ or buf[offs + op_len] != after
+ ):
+ print(v.hex())
+ print(hex(op_len), hex(offs), hex(buf[offs - 1]), hex(buf[offs + op_len]))
+ ok = False
+
+ print("Variable offset and lengths " + ("OK" if ok else "FAIL"))
diff --git a/tests/ports/stm32_hardware/dma_alignment.py.exp b/tests/ports/stm32_hardware/dma_alignment.py.exp
new file mode 100644
index 000000000..e890e0081
--- /dev/null
+++ b/tests/ports/stm32_hardware/dma_alignment.py.exp
@@ -0,0 +1,2 @@
+Variable offset fixed length OK
+Variable offset and lengths OK