From 9e541b3cee70a3bbe86b176c903c23b29fe033cd Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Wed, 10 Dec 2025 21:29:05 +0800 Subject: PCI: trace: Add generic RAS tracepoint for hotplug event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hotplug events are critical indicators for analyzing hardware health, and surprise link downs can significantly impact system performance and reliability. Define a new TRACING_SYSTEM named "pci", add a generic RAS tracepoint for hotplug event to help health checks. Add enum pci_hotplug_event in include/uapi/linux/pci.h so applications like rasdaemon can register tracepoint event handlers for it. The following output is generated when a device is hotplugged: $ echo 1 > /sys/kernel/debug/tracing/events/pci/pci_hp_event/enable $ cat /sys/kernel/debug/tracing/trace_pipe irq/51-pciehp-88 [001] ..... 1311.177459: pci_hp_event: 0000:00:02.0 slot:10, event:CARD_PRESENT irq/51-pciehp-88 [001] ..... 1311.177566: pci_hp_event: 0000:00:02.0 slot:10, event:LINK_UP Suggested-by: Lukas Wunner Signed-off-by: Shuai Xue Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Reviewed-by: Jonathan Cameron Reviewed-by: Steven Rostedt (Google) # for trace event Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/20251210132907.58799-2-xueshuai@linux.alibaba.com --- include/uapi/linux/pci.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci.h b/include/uapi/linux/pci.h index a769eefc5139..4f150028965d 100644 --- a/include/uapi/linux/pci.h +++ b/include/uapi/linux/pci.h @@ -39,4 +39,11 @@ #define PCIIOC_MMAP_IS_MEM (PCIIOC_BASE | 0x02) /* Set mmap state to MEM space. */ #define PCIIOC_WRITE_COMBINE (PCIIOC_BASE | 0x03) /* Enable/disable write-combining. */ +enum pci_hotplug_event { + PCI_HOTPLUG_LINK_UP, + PCI_HOTPLUG_LINK_DOWN, + PCI_HOTPLUG_CARD_PRESENT, + PCI_HOTPLUG_CARD_NOT_PRESENT, +}; + #endif /* _UAPILINUX_PCI_H */ -- cgit v1.2.3 From cad3337bb6c3a2ba2307d6a9061e752e15681d2b Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 19 Dec 2025 19:40:33 +0200 Subject: PCI: Add dword #defines for Bus Number + Secondary Latency Timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit uapi/linux/pci_regs.h defines Primary/Secondary/Subordinate Bus Numbers and Secondary Latency Timer (PCIe r7.0, sec. 7.5.1.3) as byte register offsets, but in practice the code may read/write the entire dword. In the lack of #defines to handle the dword fields, the code ends up using literals which are not as easy to read. Add dword field masks for the Bus Number and Secondary Latency Timer fields and use them in probe.c. Signed-off-by: Ilpo Järvinen [bhelgaas: squash new #defines and uses together] Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20251219174036.16738-21-ilpo.jarvinen@linux.intel.com Link: https://patch.msgid.link/20251219174036.16738-22-ilpo.jarvinen@linux.intel.com --- drivers/pci/probe.c | 25 +++++++++++++------------ include/uapi/linux/pci_regs.h | 5 +++++ 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index ed4d26833640..53ec1879fb99 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -524,8 +524,8 @@ static void pci_read_bridge_windows(struct pci_dev *bridge) pci_read_config_dword(bridge, PCI_PRIMARY_BUS, &buses); res.flags = IORESOURCE_BUS; - res.start = (buses >> 8) & 0xff; - res.end = (buses >> 16) & 0xff; + res.start = FIELD_GET(PCI_SECONDARY_BUS_MASK, buses); + res.end = FIELD_GET(PCI_SUBORDINATE_BUS_MASK, buses); pci_info(bridge, "PCI bridge to %pR%s\n", &res, bridge->transparent ? " (subtractive decode)" : ""); @@ -1393,9 +1393,9 @@ static int pci_scan_bridge_extend(struct pci_bus *bus, struct pci_dev *dev, pm_runtime_get_sync(&dev->dev); pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses); - primary = buses & 0xFF; - secondary = (buses >> 8) & 0xFF; - subordinate = (buses >> 16) & 0xFF; + primary = FIELD_GET(PCI_PRIMARY_BUS_MASK, buses); + secondary = FIELD_GET(PCI_SECONDARY_BUS_MASK, buses); + subordinate = FIELD_GET(PCI_SUBORDINATE_BUS_MASK, buses); pci_dbg(dev, "scanning [bus %02x-%02x] behind bridge, pass %d\n", secondary, subordinate, pass); @@ -1476,7 +1476,7 @@ static int pci_scan_bridge_extend(struct pci_bus *bus, struct pci_dev *dev, * ranges. */ pci_write_config_dword(dev, PCI_PRIMARY_BUS, - buses & ~0xffffff); + buses & PCI_SEC_LATENCY_TIMER_MASK); goto out; } @@ -1507,18 +1507,19 @@ static int pci_scan_bridge_extend(struct pci_bus *bus, struct pci_dev *dev, if (available_buses) available_buses--; - buses = (buses & 0xff000000) - | ((unsigned int)(child->primary) << 0) - | ((unsigned int)(child->busn_res.start) << 8) - | ((unsigned int)(child->busn_res.end) << 16); + buses = (buses & PCI_SEC_LATENCY_TIMER_MASK) | + FIELD_PREP(PCI_PRIMARY_BUS_MASK, child->primary) | + FIELD_PREP(PCI_SECONDARY_BUS_MASK, child->busn_res.start) | + FIELD_PREP(PCI_SUBORDINATE_BUS_MASK, child->busn_res.end); /* * yenta.c forces a secondary latency timer of 176. * Copy that behaviour here. */ if (is_cardbus) { - buses &= ~0xff000000; - buses |= CARDBUS_LATENCY_TIMER << 24; + buses &= ~PCI_SEC_LATENCY_TIMER_MASK; + buses |= FIELD_PREP(PCI_SEC_LATENCY_TIMER_MASK, + CARDBUS_LATENCY_TIMER); } /* We need to blast all three values with a single write */ diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 3add74ae2594..8be55ece2a21 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -132,6 +132,11 @@ #define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */ #define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge */ #define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary interface */ +/* Masks for dword-sized processing of Bus Number and Sec Latency Timer fields */ +#define PCI_PRIMARY_BUS_MASK 0x000000ff +#define PCI_SECONDARY_BUS_MASK 0x0000ff00 +#define PCI_SUBORDINATE_BUS_MASK 0x00ff0000 +#define PCI_SEC_LATENCY_TIMER_MASK 0xff000000 #define PCI_IO_BASE 0x1c /* I/O range behind the bridge */ #define PCI_IO_LIMIT 0x1d #define PCI_IO_RANGE_TYPE_MASK 0x0fUL /* I/O bridging type */ -- cgit v1.2.3 From 8cf82bb558517503a81f8e3c49914c0836360aa6 Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Sat, 24 Jan 2026 23:50:11 +0900 Subject: misc: pci_endpoint_test: Add BAR subrange mapping test case Add a new PCITEST_BAR_SUBRANGE ioctl to exercise EPC BAR subrange mapping end-to-end. The test programs a simple 2-subrange layout on the endpoint (via pci-epf-test) and verifies that: - the endpoint-provided per-subrange signature bytes are observed at the expected PCIe BAR offsets, and - writes to each subrange are routed to the correct backing region (i.e. the submap order is applied rather than accidentally working due to an identity mapping). Return -EOPNOTSUPP when the endpoint does not advertise subrange mapping, -ENODATA when the BAR is disabled, and -EBUSY when the BAR is reserved for the test register space. Signed-off-by: Koichiro Den Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20260124145012.2794108-8-den@valinux.co.jp --- drivers/misc/pci_endpoint_test.c | 203 ++++++++++++++++++++++++++++++++++++++- include/uapi/linux/pcitest.h | 1 + 2 files changed, 203 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 1c0fd185114f..74ab5b5b9011 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -39,6 +39,8 @@ #define COMMAND_COPY BIT(5) #define COMMAND_ENABLE_DOORBELL BIT(6) #define COMMAND_DISABLE_DOORBELL BIT(7) +#define COMMAND_BAR_SUBRANGE_SETUP BIT(8) +#define COMMAND_BAR_SUBRANGE_CLEAR BIT(9) #define PCI_ENDPOINT_TEST_STATUS 0x8 #define STATUS_READ_SUCCESS BIT(0) @@ -55,6 +57,10 @@ #define STATUS_DOORBELL_ENABLE_FAIL BIT(11) #define STATUS_DOORBELL_DISABLE_SUCCESS BIT(12) #define STATUS_DOORBELL_DISABLE_FAIL BIT(13) +#define STATUS_BAR_SUBRANGE_SETUP_SUCCESS BIT(14) +#define STATUS_BAR_SUBRANGE_SETUP_FAIL BIT(15) +#define STATUS_BAR_SUBRANGE_CLEAR_SUCCESS BIT(16) +#define STATUS_BAR_SUBRANGE_CLEAR_FAIL BIT(17) #define PCI_ENDPOINT_TEST_LOWER_SRC_ADDR 0x0c #define PCI_ENDPOINT_TEST_UPPER_SRC_ADDR 0x10 @@ -77,6 +83,7 @@ #define CAP_MSI BIT(1) #define CAP_MSIX BIT(2) #define CAP_INTX BIT(3) +#define CAP_SUBRANGE_MAPPING BIT(4) #define PCI_ENDPOINT_TEST_DB_BAR 0x34 #define PCI_ENDPOINT_TEST_DB_OFFSET 0x38 @@ -100,6 +107,8 @@ #define PCI_DEVICE_ID_ROCKCHIP_RK3588 0x3588 +#define PCI_ENDPOINT_TEST_BAR_SUBRANGE_NSUB 2 + static DEFINE_IDA(pci_endpoint_test_ida); #define to_endpoint_test(priv) container_of((priv), struct pci_endpoint_test, \ @@ -414,6 +423,193 @@ static int pci_endpoint_test_bars(struct pci_endpoint_test *test) return 0; } +static u8 pci_endpoint_test_subrange_sig_byte(enum pci_barno barno, + unsigned int subno) +{ + return 0x50 + (barno * 8) + subno; +} + +static u8 pci_endpoint_test_subrange_test_byte(enum pci_barno barno, + unsigned int subno) +{ + return 0xa0 + (barno * 8) + subno; +} + +static int pci_endpoint_test_bar_subrange_cmd(struct pci_endpoint_test *test, + enum pci_barno barno, u32 command, + u32 ok_bit, u32 fail_bit) +{ + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + int irq_type = test->irq_type; + u32 status; + + if (irq_type < PCITEST_IRQ_TYPE_INTX || + irq_type > PCITEST_IRQ_TYPE_MSIX) { + dev_err(dev, "Invalid IRQ type\n"); + return -EINVAL; + } + + reinit_completion(&test->irq_raised); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_STATUS, 0); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1); + /* Reuse SIZE as a command parameter: bar number. */ + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, barno); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, command); + + if (!wait_for_completion_timeout(&test->irq_raised, + msecs_to_jiffies(1000))) + return -ETIMEDOUT; + + status = pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_STATUS); + if (status & fail_bit) + return -EIO; + + if (!(status & ok_bit)) + return -EIO; + + return 0; +} + +static int pci_endpoint_test_bar_subrange_setup(struct pci_endpoint_test *test, + enum pci_barno barno) +{ + return pci_endpoint_test_bar_subrange_cmd(test, barno, + COMMAND_BAR_SUBRANGE_SETUP, + STATUS_BAR_SUBRANGE_SETUP_SUCCESS, + STATUS_BAR_SUBRANGE_SETUP_FAIL); +} + +static int pci_endpoint_test_bar_subrange_clear(struct pci_endpoint_test *test, + enum pci_barno barno) +{ + return pci_endpoint_test_bar_subrange_cmd(test, barno, + COMMAND_BAR_SUBRANGE_CLEAR, + STATUS_BAR_SUBRANGE_CLEAR_SUCCESS, + STATUS_BAR_SUBRANGE_CLEAR_FAIL); +} + +static int pci_endpoint_test_bar_subrange(struct pci_endpoint_test *test, + enum pci_barno barno) +{ + u32 nsub = PCI_ENDPOINT_TEST_BAR_SUBRANGE_NSUB; + struct device *dev = &test->pdev->dev; + size_t sub_size, buf_size; + resource_size_t bar_size; + void __iomem *bar_addr; + void *read_buf = NULL; + int ret, clear_ret; + size_t off, chunk; + u32 i, exp, val; + u8 pattern; + + if (!(test->ep_caps & CAP_SUBRANGE_MAPPING)) + return -EOPNOTSUPP; + + /* + * The test register BAR is not safe to reprogram and write/read + * over its full size. BAR_TEST already special-cases it to a tiny + * range. For subrange mapping tests, let's simply skip it. + */ + if (barno == test->test_reg_bar) + return -EBUSY; + + bar_size = pci_resource_len(test->pdev, barno); + if (!bar_size) + return -ENODATA; + + bar_addr = test->bar[barno]; + if (!bar_addr) + return -ENOMEM; + + ret = pci_endpoint_test_bar_subrange_setup(test, barno); + if (ret) + return ret; + + if (bar_size % nsub || bar_size / nsub > SIZE_MAX) { + ret = -EINVAL; + goto out_clear; + } + + sub_size = bar_size / nsub; + if (sub_size < sizeof(u32)) { + ret = -ENOSPC; + goto out_clear; + } + + /* Limit the temporary buffer size */ + buf_size = min_t(size_t, sub_size, SZ_1M); + + read_buf = kmalloc(buf_size, GFP_KERNEL); + if (!read_buf) { + ret = -ENOMEM; + goto out_clear; + } + + /* + * Step 1: verify EP-provided signature per subrange. This detects + * whether the EP actually applied the submap order. + */ + for (i = 0; i < nsub; i++) { + exp = (u32)pci_endpoint_test_subrange_sig_byte(barno, i) * + 0x01010101U; + val = ioread32(bar_addr + (i * sub_size)); + if (val != exp) { + dev_err(dev, + "BAR%d subrange%u signature mismatch @%#zx: exp %#08x got %#08x\n", + barno, i, (size_t)i * sub_size, exp, val); + ret = -EIO; + goto out_clear; + } + val = ioread32(bar_addr + (i * sub_size) + sub_size - sizeof(u32)); + if (val != exp) { + dev_err(dev, + "BAR%d subrange%u signature mismatch @%#zx: exp %#08x got %#08x\n", + barno, i, + ((size_t)i * sub_size) + sub_size - sizeof(u32), + exp, val); + ret = -EIO; + goto out_clear; + } + } + + /* Step 2: write unique pattern per subrange (write all first). */ + for (i = 0; i < nsub; i++) { + pattern = pci_endpoint_test_subrange_test_byte(barno, i); + memset_io(bar_addr + (i * sub_size), pattern, sub_size); + } + + /* Step 3: read back and verify (read all after all writes). */ + for (i = 0; i < nsub; i++) { + pattern = pci_endpoint_test_subrange_test_byte(barno, i); + for (off = 0; off < sub_size; off += chunk) { + void *bad; + + chunk = min_t(size_t, buf_size, sub_size - off); + memcpy_fromio(read_buf, bar_addr + (i * sub_size) + off, + chunk); + bad = memchr_inv(read_buf, pattern, chunk); + if (bad) { + size_t bad_off = (u8 *)bad - (u8 *)read_buf; + + dev_err(dev, + "BAR%d subrange%u data mismatch @%#zx (pattern %#02x)\n", + barno, i, (size_t)i * sub_size + off + bad_off, + pattern); + ret = -EIO; + goto out_clear; + } + } + } + +out_clear: + kfree(read_buf); + clear_ret = pci_endpoint_test_bar_subrange_clear(test, barno); + return ret ?: clear_ret; +} + static int pci_endpoint_test_intx_irq(struct pci_endpoint_test *test) { u32 val; @@ -936,12 +1132,17 @@ static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case PCITEST_BAR: + case PCITEST_BAR_SUBRANGE: bar = arg; if (bar <= NO_BAR || bar > BAR_5) goto ret; if (is_am654_pci_dev(pdev) && bar == BAR_0) goto ret; - ret = pci_endpoint_test_bar(test, bar); + + if (cmd == PCITEST_BAR) + ret = pci_endpoint_test_bar(test, bar); + else + ret = pci_endpoint_test_bar_subrange(test, bar); break; case PCITEST_BARS: ret = pci_endpoint_test_bars(test); diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h index d6023a45a9d0..710f8842223f 100644 --- a/include/uapi/linux/pcitest.h +++ b/include/uapi/linux/pcitest.h @@ -22,6 +22,7 @@ #define PCITEST_GET_IRQTYPE _IO('P', 0x9) #define PCITEST_BARS _IO('P', 0xa) #define PCITEST_DOORBELL _IO('P', 0xb) +#define PCITEST_BAR_SUBRANGE _IO('P', 0xc) #define PCITEST_CLEAR_IRQ _IO('P', 0x10) #define PCITEST_IRQ_TYPE_UNDEFINED -1 -- cgit v1.2.3