summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2025-12-23 15:18:22 +0000
committerMark Brown <broonie@kernel.org>2025-12-23 15:18:22 +0000
commitb005d618c8547b7dfb14e83a1b410a6a04ac36c6 (patch)
treec3a1af5c6b995be04256348dbfd6e6165dfc0805 /drivers
parentc81f30bde5b0449d9d82d31a66f0ffd608e610b5 (diff)
parent5bfbbf0a49ee4b5dcf46a3bfd4cd860d72cc887d (diff)
spi: cadence-quadspi: Prevent indirect read
Merge series from Mateusz Litwin <mateusz.litwin@nokia.com>: On the Stratix10 platform, indirect reads can become very slow due to lost interrupts and/or missed `complete()` calls, causing `wait_for_completion_timeout()` to expire. Three issues were identified: 1) A race condition exists between the read loop and IRQ `complete()` call: An IRQ can call `complete()` after the inner loop ends, but before `reinit_completion()`, losing the completion event and leading to `wait_for_completion_timeout()` expire. This function will not return an error because `bytes_to_read` > 0 (indicating data is already in the FIFO) and the final `ret` value is overwritten by `cqspi_wait_for_bit()` return value (indicating request completion), masking the timeout. For test purpose, logging was added to print the count of timeouts and the outer loop count. $ dd if=/dev/mtd0 of=/dev/null bs=64M count=1 [ 2232.925219] cadence-qspi ff8d2000.spi: Indirect read error timeout (1) loop (12472) [ 2236.200391] cadence-qspi ff8d2000.spi: Indirect read error timeout (1) loop (12460) [ 2239.482836] cadence-qspi ff8d2000.spi: Indirect read error timeout (5) loop (12450) This indicates that such an event is rare, but possible. Tested on the Stratix10 platform. 2) The quirk assumes the indirect read path never leaves the inner loop on SoCFPGA. This assumption is incorrect when using slow flash. Disabling IRQs in the inner loop can cause lost interrupts. 3) The `CQSPI_SLOW_SRAM` quirk disables `CQSPI_REG_IRQ_IND_COMP` (indirect completion) interrupt, relying solely on the `CQSPI_REG_IRQ_WATERMARK` (FIFO watermark) interrupt. For small transfers sizes, the final data read might not fill the FIFO sufficiently to trigger the watermark, preventing completion and leading to wait_for_completion_timeout() expiration. Two patches have been prepared to resolve these issues. - [1/2] spi: cadence-quadspi: Prevent lost complete() call during indirect read Moving `reinit_completion()` before the inner loop prevents a race condition. This might cause a premature IRQ complete() call to occur; however, in the worst case, this will result in a spurious wakeup and another wait cycle, which is preferable to waiting for a timeout. - [2/2] spi: cadence-quadspi: Improve CQSPI_SLOW_SRAM quirk if flash is slow Re-enabling `CQSPI_REG_IRQ_IND_COMP` interrupt resolves the problem for small reads and removes the disabling of interrupts, addressing the issue with lost interrupts. This marginally increases the IRQ count. Test: $ dd if=/dev/mtd0 of=/dev/null bs=1M count=64 Results from the Stratix10 platform with mt25qu02g flash. FIFO size in all tests: 128 Serviced interrupt call counts: Without `CQSPI_SLOW_SRAM` quirk: 16 668 850 With `CQSPI_SLOW_SRAM` quirk: 204 176 With `CQSPI_SLOW_SRAM` and this patch: 224 528 Patch 2/2: Delivers a substantial read‑performance improvement for the Cadence QSPI controller on the Stratix10 platform. Patch 1/2: Applies to all platforms and should yield a modest performance gain, most noticeable with large `CQSPI_READ_TIMEOUT_MS` values and workloads dominated by many small reads.
Diffstat (limited to 'drivers')
-rw-r--r--drivers/spi/spi-cadence-quadspi.c23
1 files changed, 11 insertions, 12 deletions
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index f8823e83a622..965b4cea3388 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -300,6 +300,9 @@ struct cqspi_driver_platdata {
CQSPI_REG_IRQ_IND_SRAM_FULL | \
CQSPI_REG_IRQ_IND_COMP)
+#define CQSPI_IRQ_MASK_RD_SLOW_SRAM (CQSPI_REG_IRQ_WATERMARK | \
+ CQSPI_REG_IRQ_IND_COMP)
+
#define CQSPI_IRQ_MASK_WR (CQSPI_REG_IRQ_IND_COMP | \
CQSPI_REG_IRQ_WATERMARK | \
CQSPI_REG_IRQ_UNDERFLOW)
@@ -381,7 +384,7 @@ static irqreturn_t cqspi_irq_handler(int this_irq, void *dev)
else if (!cqspi->slow_sram)
irq_status &= CQSPI_IRQ_MASK_RD | CQSPI_IRQ_MASK_WR;
else
- irq_status &= CQSPI_REG_IRQ_WATERMARK | CQSPI_IRQ_MASK_WR;
+ irq_status &= CQSPI_IRQ_MASK_RD_SLOW_SRAM | CQSPI_IRQ_MASK_WR;
if (irq_status)
complete(&cqspi->transfer_complete);
@@ -757,7 +760,7 @@ static int cqspi_indirect_read_execute(struct cqspi_flash_pdata *f_pdata,
*/
if (use_irq && cqspi->slow_sram)
- writel(CQSPI_REG_IRQ_WATERMARK, reg_base + CQSPI_REG_IRQMASK);
+ writel(CQSPI_IRQ_MASK_RD_SLOW_SRAM, reg_base + CQSPI_REG_IRQMASK);
else if (use_irq)
writel(CQSPI_IRQ_MASK_RD, reg_base + CQSPI_REG_IRQMASK);
else
@@ -769,17 +772,19 @@ static int cqspi_indirect_read_execute(struct cqspi_flash_pdata *f_pdata,
readl(reg_base + CQSPI_REG_INDIRECTRD); /* Flush posted write. */
while (remaining > 0) {
+ ret = 0;
if (use_irq &&
!wait_for_completion_timeout(&cqspi->transfer_complete,
msecs_to_jiffies(CQSPI_READ_TIMEOUT_MS)))
ret = -ETIMEDOUT;
/*
- * Disable all read interrupts until
- * we are out of "bytes to read"
+ * Prevent lost interrupt and race condition by reinitializing early.
+ * A spurious wakeup and another wait cycle can occur here,
+ * which is preferable to waiting until timeout if interrupt is lost.
*/
- if (cqspi->slow_sram)
- writel(0x0, reg_base + CQSPI_REG_IRQMASK);
+ if (use_irq)
+ reinit_completion(&cqspi->transfer_complete);
bytes_to_read = cqspi_get_rd_sram_level(cqspi);
@@ -811,12 +816,6 @@ static int cqspi_indirect_read_execute(struct cqspi_flash_pdata *f_pdata,
remaining -= bytes_to_read;
bytes_to_read = cqspi_get_rd_sram_level(cqspi);
}
-
- if (use_irq && remaining > 0) {
- reinit_completion(&cqspi->transfer_complete);
- if (cqspi->slow_sram)
- writel(CQSPI_REG_IRQ_WATERMARK, reg_base + CQSPI_REG_IRQMASK);
- }
}
/* Check indirect done status */