From bc62f5b308cbdedf29132fe96e9d591e526527e1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 20 Nov 2025 03:19:17 +0000 Subject: dax/hmem, e820, resource: Defer Soft Reserved insertion until hmem is ready Insert Soft Reserved memory into a dedicated soft_reserve_resource tree instead of the iomem_resource tree at boot. Delay publishing these ranges into the iomem hierarchy until ownership is resolved and the HMEM path is ready to consume them. Publishing Soft Reserved ranges into iomem too early conflicts with CXL hotplug and prevents region assembly when those ranges overlap CXL windows. Follow up patches will reinsert Soft Reserved ranges into iomem after CXL window publication is complete and HMEM is ready to claim the memory. This provides a cleaner handoff between EFI-defined memory ranges and CXL resource management without trimming or deleting resources later. In the meantime "Soft Reserved" resources will no longer appear in /proc/iomem, only their results. I.e. with "memmap=4G%4G+0xefffffff" Before: 100000000-1ffffffff : Soft Reserved 100000000-1ffffffff : dax1.0 100000000-1ffffffff : System RAM (kmem) After: 100000000-1ffffffff : dax1.0 100000000-1ffffffff : System RAM (kmem) The expectation is that this does not lead to a user visible regression because the dax1.0 device is created in both instances. Co-developed-by: Smita Koralahalli [Smita: incorporate feedback from x86 maintainer review] Signed-off-by: Smita Koralahalli Link: https://patch.msgid.link/20251120031925.87762-2-Smita.KoralahalliChannabasappa@amd.com [djbw: cleanups and clarifications] Link: https://lore.kernel.org/69443f707b025_1cee10022@dwillia2-mobl4.notmuch Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- include/linux/ioport.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 9afa30f9346f..95662b2fb458 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -232,6 +232,7 @@ struct resource_constraint { /* PC/ISA/whatever - the normal PC address spaces: IO and memory */ extern struct resource ioport_resource; extern struct resource iomem_resource; +extern struct resource soft_reserve_resource; extern struct resource *request_resource_conflict(struct resource *root, struct resource *new); extern int request_resource(struct resource *root, struct resource *new); @@ -418,6 +419,10 @@ walk_system_ram_res_rev(u64 start, u64 end, void *arg, extern int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); +extern int walk_soft_reserve_res(u64 start, u64 end, void *arg, + int (*func)(struct resource *, void *)); +extern int +region_intersects_soft_reserve(resource_size_t start, size_t size); struct resource *devm_request_free_mem_region(struct device *dev, struct resource *base, unsigned long size); -- cgit v1.2.3 From 7c29ba02210c6e4570cdce53813a1ae68fb6d049 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:24 -0600 Subject: PCI: Introduce pcie_is_cxl() CXL is a protocol that runs on top of PCIe electricals. Its error model also runs on top of the PCIe AER error model by standardizing "internal" errors as "CXL" errors. Linux has historically ignored internal errors. CXL protocol error handling is then a task of enhancing the PCIe AER core to understand that PCIe ports (upstream and downstream) and endpoints may throw internal errors that represent standard CXL protocol errors. The proposed method to make that determination is to teach 'struct pci_dev' to cache when its link has trained the CXL.mem and/or CXL.cache protocols and then treat all internal errors as CXL errors. A design goal is to not burden the PCIe AER core with CXL knowledge beyond just enough to forward error notifications to the CXL RAS core. The forwarded notification looks up a 'struct cxl_port' or 'struct cxl_dport' companion device to the PCI device. Introduce set_pcie_cxl() with logic checking for CXL.mem or CXL.cache status in the CXL Flex Bus DVSEC status register. The CXL Flex Bus DVSEC presence is used because it is required for all the CXL PCIe devices.[1] [1] CXL 3.1 Spec, 8.1.1 PCIe Designated Vendor-Specific Extended Capability (DVSEC) ID Assignment, Table 8-2 Signed-off-by: Terry Bowman Reviewed-by: Ira Weiny Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Alejandro Lucero Reviewed-by: Ben Cheatham Reviewed-by: Dan Williams Acked-by: Bjorn Helgaas Link: https://patch.msgid.link/20260114182055.46029-4-terry.bowman@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/pci/probe.c | 31 +++++++++++++++++++++++++++++++ include/linux/pci.h | 6 ++++++ include/uapi/linux/pci_regs.h | 6 ++++++ 3 files changed, 43 insertions(+) (limited to 'include/linux') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 41183aed8f5d..bd7ce41d0c7a 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1735,6 +1735,35 @@ static void set_pcie_thunderbolt(struct pci_dev *dev) dev->is_thunderbolt = 1; } +static void set_pcie_cxl(struct pci_dev *dev) +{ + struct pci_dev *bridge; + u16 dvsec, cap; + + if (!pci_is_pcie(dev)) + return; + + /* + * Update parent's CXL state because alternate protocol training + * may have changed + */ + bridge = pci_upstream_bridge(dev); + if (bridge) + set_pcie_cxl(bridge); + + dvsec = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_FLEXBUS_PORT); + if (!dvsec) + return; + + pci_read_config_word(dev, dvsec + PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS, + &cap); + + dev->is_cxl = FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_CACHE, cap) || + FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_MEM, cap); + +} + static void set_pcie_untrusted(struct pci_dev *dev) { struct pci_dev *parent = pci_upstream_bridge(dev); @@ -2065,6 +2094,8 @@ int pci_setup_device(struct pci_dev *dev) /* Need to have dev->cfg_size ready */ set_pcie_thunderbolt(dev); + set_pcie_cxl(dev); + set_pcie_untrusted(dev); if (pci_is_pcie(dev)) diff --git a/include/linux/pci.h b/include/linux/pci.h index 864775651c6f..f8e8b3df794d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -463,6 +463,7 @@ struct pci_dev { unsigned int is_pciehp:1; unsigned int shpc_managed:1; /* SHPC owned by shpchp */ unsigned int is_thunderbolt:1; /* Thunderbolt controller */ + unsigned int is_cxl:1; /* Compute Express Link (CXL) */ /* * Devices marked being untrusted are the ones that can potentially * execute DMA attacks and similar. They are typically connected @@ -791,6 +792,11 @@ static inline bool pci_is_display(struct pci_dev *pdev) return (pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY; } +static inline bool pcie_is_cxl(struct pci_dev *pci_dev) +{ + return pci_dev->is_cxl; +} + #define for_each_pci_bridge(dev, bus) \ list_for_each_entry(dev, &bus->devices, bus_list) \ if (!pci_is_bridge(dev)) {} else diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 662582bdccf0..b6622fd60fd9 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1379,6 +1379,12 @@ /* CXL r4.0, 8.1.7: GPF DVSEC for CXL Device */ #define PCI_DVSEC_CXL_DEVICE_GPF 5 +/* CXL r4.0, 8.1.8: Flex Bus DVSEC */ +#define PCI_DVSEC_CXL_FLEXBUS_PORT 7 +#define PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS 0xE +#define PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_CACHE _BITUL(0) +#define PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_MEM _BITUL(2) + /* CXL r4.0, 8.1.9: Register Locator DVSEC */ #define PCI_DVSEC_CXL_REG_LOCATOR 8 #define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1 0xC -- cgit v1.2.3 From 6dc5fe212e74e6880a1da0093f627387d0a658bb Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:30 -0600 Subject: PCI/AER: Export pci_aer_unmask_internal_errors() Internal PCIe errors are not enabled by default during initialization because their behavior is too device-specific and there is no standard way to reason about them. However, for CXL an internal error is the standard mechanism for conveying CXL protocol errors. Export pci_aer_unmask_internal_errors() for CXL, but make it clear that they are only meant for CXL and the status quo for leaving them masked for PCIe in general remains. Signed-off-by: Terry Bowman Reviewed-by: Dave Jiang Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260114182055.46029-10-terry.bowman@amd.com Co-developed-by: Dan Williams Signed-off-by: Dan Williams Acked-by: Bjorn Helgaas Signed-off-by: Dave Jiang --- drivers/pci/pcie/aer.c | 11 ++++++++--- include/linux/aer.h | 2 ++ 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index c99ba2a1159c..972ecaf6a832 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1120,8 +1120,6 @@ static bool find_source_device(struct pci_dev *parent, return true; } -#ifdef CONFIG_PCIEAER_CXL - /** * pci_aer_unmask_internal_errors - unmask internal errors * @dev: pointer to the pci_dev data structure @@ -1132,7 +1130,7 @@ static bool find_source_device(struct pci_dev *parent, * Note: AER must be enabled and supported by the device which must be * checked in advance, e.g. with pcie_aer_is_native(). */ -static void pci_aer_unmask_internal_errors(struct pci_dev *dev) +void pci_aer_unmask_internal_errors(struct pci_dev *dev) { int aer = dev->aer_cap; u32 mask; @@ -1146,6 +1144,13 @@ static void pci_aer_unmask_internal_errors(struct pci_dev *dev) pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask); } +/* + * Internal errors are too device-specific to enable generally, however for CXL + * their behavior is standardized for conveying CXL protocol errors. + */ +EXPORT_SYMBOL_FOR_MODULES(pci_aer_unmask_internal_errors, "cxl_core"); + +#ifdef CONFIG_PCIEAER_CXL static bool is_cxl_mem_dev(struct pci_dev *dev) { /* diff --git a/include/linux/aer.h b/include/linux/aer.h index 02940be66324..df0f5c382286 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -56,12 +56,14 @@ struct aer_capability_regs { #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); int pcie_aer_is_native(struct pci_dev *dev); +void pci_aer_unmask_internal_errors(struct pci_dev *dev); #else static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; } static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } +static inline void pci_aer_unmask_internal_errors(struct pci_dev *dev) { } #endif void pci_print_aer(struct pci_dev *dev, int aer_severity, -- cgit v1.2.3