diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-08-01 15:47:06 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-08-01 15:47:06 -0700 |
commit | d41e5839d80043beaa63973eab602579ebdb238f (patch) | |
tree | a5e1b52c432af6a179af92bf036c5a5367054b2b | |
parent | 111857421c93fc88924106436741bd2f5b8bc220 (diff) | |
parent | f11a5f89910a7ae970fbce4fdc02d86a8ba8570f (diff) |
Merge tag 'cxl-for-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull CXL updates from Dave Jiang:
"The most significant changes in this pull request is the series that
introduces ACQUIRE() and ACQUIRE_ERR() macros to replace conditional
locking and ease the pain points of scoped_cond_guard().
The series also includes follow on changes that refactor the CXL
sub-system to utilize the new macros.
Detail summary:
- Add documentation template for CXL conventions to document CXL
platform quirks
- Replace mutex_lock_io() with mutex_lock() for mailbox
- Add location limit for fake CFMWS range for cxl_test, ARM platform
enabling
- CXL documentation typo and clarity fixes
- Use correct format specifier for function cxl_set_ecs_threshold()
- Make cxl_bus_type constant
- Introduce new helper cxl_resource_contains_addr() to check address
availability
- Fix wrong DPA checking for PPR operation
- Remove core/acpi.c and CXL core dependency on ACPI
- Introduce ACQUIRE() and ACQUIRE_ERR() for conditional locks
- Add CXL updates utilizing ACQUIRE() macro to remove gotos and
improve readability
- Add return for the dummy version of cxl_decoder_detach() without
CONFIG_CXL_REGION
- CXL events updates for spec r3.2
- Fix return of __cxl_decoder_detach() error path
- CXL debugfs documentation fix"
* tag 'cxl-for-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (28 commits)
Documentation/ABI/testing/debugfs-cxl: Add 'cxl' to clear_poison path
cxl/region: Fix an ERR_PTR() vs NULL bug
cxl/events: Trace Memory Sparing Event Record
cxl/events: Add extra validity checks for CVME count in DRAM Event Record
cxl/events: Add extra validity checks for corrected memory error count in General Media Event Record
cxl/events: Update Common Event Record to CXL spec rev 3.2
cxl: Fix -Werror=return-type in cxl_decoder_detach()
cleanup: Fix documentation build error for ACQUIRE updates
cxl: Convert to ACQUIRE() for conditional rwsem locking
cxl/region: Consolidate cxl_decoder_kill_region() and cxl_region_detach()
cxl/region: Move ready-to-probe state check to a helper
cxl/region: Split commit_store() into __commit() and queue_reset() helpers
cxl/decoder: Drop pointless locking
cxl/decoder: Move decoder register programming to a helper
cxl/mbox: Convert poison list mutex to ACQUIRE()
cleanup: Introduce ACQUIRE() and ACQUIRE_ERR() for conditional locks
cxl: Remove core/acpi.c and cxl core dependency on ACPI
cxl/core: Using cxl_resource_contains_addr() to check address availability
cxl/edac: Fix wrong dpa checking for PPR operation
cxl/core: Introduce a new helper cxl_resource_contains_addr()
...
28 files changed, 827 insertions, 453 deletions
diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl index 12488c14be64..e95e21f131e9 100644 --- a/Documentation/ABI/testing/debugfs-cxl +++ b/Documentation/ABI/testing/debugfs-cxl @@ -20,7 +20,7 @@ Description: visible for devices supporting the capability. -What: /sys/kernel/debug/memX/clear_poison +What: /sys/kernel/debug/cxl/memX/clear_poison Date: April, 2023 KernelVersion: v6.4 Contact: linux-cxl@vger.kernel.org diff --git a/Documentation/driver-api/cxl/conventions.rst b/Documentation/driver-api/cxl/conventions.rst new file mode 100644 index 000000000000..da347a81a237 --- /dev/null +++ b/Documentation/driver-api/cxl/conventions.rst @@ -0,0 +1,47 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + +======================================= +Compute Express Link: Linux Conventions +======================================= + +There exists shipping platforms that bend or break CXL specification +expectations. Record the details and the rationale for those deviations. +Borrow the ACPI Code First template format to capture the assumptions +and tradeoffs such that multiple platform implementations can follow the +same convention. + +<(template) Title> +================== + +Document +-------- +CXL Revision <rev>, Version <ver> + +License +------- +SPDX-License Identifier: CC-BY-4.0 + +Creator/Contributors +-------------------- + +Summary of the Change +--------------------- + +<Detail the conflict with the specification and where available the +assumptions and tradeoffs taken by the hardware platform.> + + +Benefits of the Change +---------------------- + +<Detail what happens if platforms and Linux do not adopt this +convention.> + +References +---------- + +Detailed Description of the Change +---------------------------------- + +<Propose spec language that corrects the conflict.> diff --git a/Documentation/driver-api/cxl/devices/device-types.rst b/Documentation/driver-api/cxl/devices/device-types.rst index f5e4330c1cfe..923f5d89bc04 100644 --- a/Documentation/driver-api/cxl/devices/device-types.rst +++ b/Documentation/driver-api/cxl/devices/device-types.rst @@ -63,13 +63,13 @@ A Type-2 CXL Device: * Supports cxl.io, cxl.cache, and cxl.mem protocols * Optionally implements coherent cache and Host-Managed Device Memory -* Is typically an accelerator device w/ high bandwidth memory. +* Is typically an accelerator device with high bandwidth memory. The primary difference between a type-1 and type-2 device is the presence of host-managed device memory, which allows the device to operate on a -local memory bank - while the CPU sill has coherent DMA to the same memory. +local memory bank - while the CPU still has coherent DMA to the same memory. -The allows things like GPUs to expose their memory via DAX devices or file +This allows things like GPUs to expose their memory via DAX devices or file descriptors, allows drivers and programs direct access to device memory rather than use block-transfer semantics. @@ -89,7 +89,7 @@ basic coherent DMA. Switch ------ -A CXL switch is a device capacity of routing any CXL (and by extension, PCIe) +A CXL switch is a device capable of routing any CXL (and by extension, PCIe) protocol between an upstream, downstream, or peer devices. Many devices, such as Multi-Logical Devices, imply the presence of switching in some manner. @@ -103,7 +103,7 @@ A Single-Logical Device (SLD) is a device which presents a single device to one or more heads. A Multi-Logical Device (MLD) is a device which may present multiple devices -to one or more devices. +to one or more upstream devices. A Single-Headed Device exposes only a single physical connection. diff --git a/Documentation/driver-api/cxl/index.rst b/Documentation/driver-api/cxl/index.rst index 9e1414ad3357..c1106a68b67c 100644 --- a/Documentation/driver-api/cxl/index.rst +++ b/Documentation/driver-api/cxl/index.rst @@ -14,6 +14,7 @@ that have impacts on each other. The docs here break up configurations steps. theory-of-operation maturity-map + conventions .. toctree:: :maxdepth: 2 diff --git a/Documentation/driver-api/cxl/linux/cxl-driver.rst b/Documentation/driver-api/cxl/linux/cxl-driver.rst index 9759e90c3cf1..dd6dd17dc536 100644 --- a/Documentation/driver-api/cxl/linux/cxl-driver.rst +++ b/Documentation/driver-api/cxl/linux/cxl-driver.rst @@ -20,7 +20,7 @@ The CXL driver is split into a number of drivers. * cxl_port - initializes root and provides port enumeration interface. * cxl_acpi - initializes root decoders and interacts with ACPI data. * cxl_p/mem - initializes memory devices -* cxl_pci - uses cxl_port to enumates the actual fabric hierarchy. +* cxl_pci - uses cxl_port to enumerate the actual fabric hierarchy. Driver Devices ============== diff --git a/Documentation/driver-api/cxl/theory-of-operation.rst b/Documentation/driver-api/cxl/theory-of-operation.rst index 40793dad3630..257f513e320c 100644 --- a/Documentation/driver-api/cxl/theory-of-operation.rst +++ b/Documentation/driver-api/cxl/theory-of-operation.rst @@ -29,8 +29,8 @@ Platform firmware enumerates a menu of interleave options at the "CXL root port" (Linux term for the top of the CXL decode topology). From there, PCIe topology dictates which endpoints can participate in which Host Bridge decode regimes. Each PCIe Switch in the path between the root and an endpoint introduces a point -at which the interleave can be split. For example platform firmware may say at a -given range only decodes to 1 one Host Bridge, but that Host Bridge may in turn +at which the interleave can be split. For example, platform firmware may say a +given range only decodes to one Host Bridge, but that Host Bridge may in turn interleave cycles across multiple Root Ports. An intervening Switch between a port and an endpoint may interleave cycles across multiple Downstream Switch Ports, etc. @@ -187,7 +187,7 @@ decodes them to "ports", "ports" decode to "endpoints", and "endpoints" represent the decode from SPA (System Physical Address) to DPA (Device Physical Address). -Continuing the RAID analogy, disks have both topology metadata and on device +Continuing the RAID analogy, disks have both topology metadata and on-device metadata that determine RAID set assembly. CXL Port topology and CXL Port link status is metadata for CXL.mem set assembly. The CXL Port topology is enumerated by the arrival of a CXL.mem device. I.e. unless and until the PCIe core attaches @@ -197,7 +197,7 @@ the Linux PCI core to tear down switch-level CXL resources because the endpoint ->remove() event cleans up the port data that was established to support that Memory Expander. -The port metadata and potential decode schemes that a give memory device may +The port metadata and potential decode schemes that a given memory device may participate can be determined via a command like:: # cxl list -BDMu -d root -m mem3 @@ -249,8 +249,8 @@ participate can be determined via a command like:: ...which queries the CXL topology to ask "given CXL Memory Expander with a kernel device name of 'mem3' which platform level decode ranges may this device participate". A given expander can participate in multiple CXL.mem interleave -sets simultaneously depending on how many decoder resource it has. In this -example mem3 can participate in one or more of a PMEM interleave that spans to +sets simultaneously depending on how many decoder resources it has. In this +example mem3 can participate in one or more of a PMEM interleave that spans two Host Bridges, a PMEM interleave that targets a single Host Bridge, a Volatile memory interleave that spans 2 Host Bridges, and a Volatile memory interleave that only targets a single Host Bridge. diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index a1a99ec3f12c..712624cba2b6 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -335,6 +335,63 @@ static int add_or_reset_cxl_resource(struct resource *parent, struct resource *r return rc; } +static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd) +{ + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct range *hpa = &cxld->hpa_range; + resource_size_t size = range_len(hpa); + resource_size_t start = hpa->start; + resource_size_t cache_size; + struct resource res; + int nid, rc; + + res = DEFINE_RES(start, size, 0); + nid = phys_to_target_node(start); + + rc = hmat_get_extended_linear_cache_size(&res, nid, &cache_size); + if (rc) + return rc; + + /* + * The cache range is expected to be within the CFMWS. + * Currently there is only support cache_size == cxl_size. CXL + * size is then half of the total CFMWS window size. + */ + size = size >> 1; + if (cache_size && size != cache_size) { + dev_warn(&cxld->dev, + "Extended Linear Cache size %pa != CXL size %pa. No Support!", + &cache_size, &size); + return -ENXIO; + } + + cxlrd->cache_size = cache_size; + + return 0; +} + +static void cxl_setup_extended_linear_cache(struct cxl_root_decoder *cxlrd) +{ + int rc; + + rc = cxl_acpi_set_cache_size(cxlrd); + if (!rc) + return; + + if (rc != -EOPNOTSUPP) { + /* + * Failing to support extended linear cache region resize does not + * prevent the region from functioning. Only causes cxl list showing + * incorrect region size. + */ + dev_warn(cxlrd->cxlsd.cxld.dev.parent, + "Extended linear cache calculation failed rc:%d\n", rc); + } + + /* Ignoring return code */ + cxlrd->cache_size = 0; +} + DEFINE_FREE(put_cxlrd, struct cxl_root_decoder *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxlsd.cxld.dev)) DEFINE_FREE(del_cxl_resource, struct resource *, if (_T) del_cxl_resource(_T)) @@ -394,6 +451,8 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, ig = CXL_DECODER_MIN_GRANULARITY; cxld->interleave_granularity = ig; + cxl_setup_extended_linear_cache(cxlrd); + if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) { if (ways != 1 && ways != 3) { cxims_ctx = (struct cxl_cxims_context) { diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 79e2ef81fde8..5ad8fef210b5 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -15,7 +15,6 @@ cxl_core-y += hdm.o cxl_core-y += pmu.o cxl_core-y += cdat.o cxl_core-y += ras.o -cxl_core-y += acpi.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o cxl_core-$(CONFIG_CXL_MCE) += mce.o diff --git a/drivers/cxl/core/acpi.c b/drivers/cxl/core/acpi.c deleted file mode 100644 index f13b4dae6ac5..000000000000 --- a/drivers/cxl/core/acpi.c +++ /dev/null @@ -1,11 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright(c) 2024 Intel Corporation. All rights reserved. */ -#include <linux/acpi.h> -#include "cxl.h" -#include "core.h" - -int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res, - int nid, resource_size_t *size) -{ - return hmat_get_extended_linear_cache_size(backing_res, nid, size); -} diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 0ccef2f2a26a..c0af645425f4 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -336,7 +336,7 @@ static int match_cxlrd_hb(struct device *dev, void *data) cxlrd = to_cxl_root_decoder(dev); cxlsd = &cxlrd->cxlsd; - guard(rwsem_read)(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); for (int i = 0; i < cxlsd->nr_targets; i++) { if (host_bridge == cxlsd->target[i]->dport_dev) return 1; @@ -987,7 +987,7 @@ void cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr) bool is_root; int rc; - lockdep_assert_held(&cxl_dpa_rwsem); + lockdep_assert_held(&cxl_rwsem.dpa); struct xarray *usp_xa __free(free_perf_xa) = kzalloc(sizeof(*usp_xa), GFP_KERNEL); @@ -1057,7 +1057,7 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr, { struct cxl_dpa_perf *perf; - lockdep_assert_held(&cxl_dpa_rwsem); + lockdep_assert_held(&cxl_rwsem.dpa); perf = cxled_get_dpa_perf(cxled); if (IS_ERR(perf)) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 29b61828a847..2669f251d677 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -5,6 +5,7 @@ #define __CXL_CORE_H__ #include <cxl/mailbox.h> +#include <linux/rwsem.h> extern const struct device_type cxl_nvdimm_bridge_type; extern const struct device_type cxl_nvdimm_type; @@ -12,6 +13,11 @@ extern const struct device_type cxl_pmu_type; extern struct attribute_group cxl_base_attribute_group; +enum cxl_detach_mode { + DETACH_ONLY, + DETACH_INVALIDATE, +}; + #ifdef CONFIG_CXL_REGION extern struct device_attribute dev_attr_create_pmem_region; extern struct device_attribute dev_attr_create_ram_region; @@ -20,7 +26,11 @@ extern struct device_attribute dev_attr_region; extern const struct device_type cxl_pmem_region_type; extern const struct device_type cxl_dax_region_type; extern const struct device_type cxl_region_type; -void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled); + +int cxl_decoder_detach(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos, + enum cxl_detach_mode mode); + #define CXL_REGION_ATTR(x) (&dev_attr_##x.attr) #define CXL_REGION_TYPE(x) (&cxl_region_type) #define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr), @@ -48,8 +58,11 @@ static inline int cxl_get_poison_by_endpoint(struct cxl_port *port) { return 0; } -static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) +static inline int cxl_decoder_detach(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, + int pos, enum cxl_detach_mode mode) { + return 0; } static inline int cxl_region_init(void) { @@ -80,6 +93,7 @@ int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size); int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled); resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled); +bool cxl_resource_contains_addr(const struct resource *res, const resource_size_t addr); enum cxl_rcrb { CXL_RCRB_DOWNSTREAM, @@ -96,8 +110,20 @@ u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb); #define PCI_RCRB_CAP_HDR_NEXT_MASK GENMASK(15, 8) #define PCI_CAP_EXP_SIZEOF 0x3c -extern struct rw_semaphore cxl_dpa_rwsem; -extern struct rw_semaphore cxl_region_rwsem; +struct cxl_rwsem { + /* + * All changes to HPA (interleave configuration) occur with this + * lock held for write. + */ + struct rw_semaphore region; + /* + * All changes to a device DPA space occur with this lock held + * for write. + */ + struct rw_semaphore dpa; +}; + +extern struct cxl_rwsem cxl_rwsem; int cxl_memdev_init(void); void cxl_memdev_exit(void); @@ -120,8 +146,6 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, int cxl_ras_init(void); void cxl_ras_exit(void); int cxl_gpf_port_setup(struct cxl_dport *dport); -int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res, - int nid, resource_size_t *size); #ifdef CONFIG_CXL_FEATURES struct cxl_feat_entry * diff --git a/drivers/cxl/core/edac.c b/drivers/cxl/core/edac.c index 623aaa4439c4..79994ca9bc9f 100644 --- a/drivers/cxl/core/edac.c +++ b/drivers/cxl/core/edac.c @@ -115,10 +115,9 @@ static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx, flags, min_cycle); } - struct rw_semaphore *region_lock __free(rwsem_read_release) = - rwsem_read_intr_acquire(&cxl_region_rwsem); - if (!region_lock) - return -EINTR; + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((ret = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) + return ret; cxlr = cxl_ps_ctx->cxlr; p = &cxlr->params; @@ -158,10 +157,9 @@ static int cxl_scrub_set_attrbs_region(struct device *dev, struct cxl_region *cxlr; int ret, i; - struct rw_semaphore *region_lock __free(rwsem_read_release) = - rwsem_read_intr_acquire(&cxl_region_rwsem); - if (!region_lock) - return -EINTR; + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((ret = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) + return ret; cxlr = cxl_ps_ctx->cxlr; p = &cxlr->params; @@ -697,7 +695,7 @@ static int cxl_set_ecs_threshold(struct device *dev, u8 *log_cap, u16 *config, ECS_THRESHOLD_IDX_4096); break; default: - dev_dbg(dev, "Invalid CXL ECS threshold count(%d) to set\n", + dev_dbg(dev, "Invalid CXL ECS threshold count(%u) to set\n", val); dev_dbg(dev, "Supported ECS threshold counts: %u, %u, %u\n", ECS_THRESHOLD_256, ECS_THRESHOLD_1024, @@ -1340,16 +1338,15 @@ cxl_mem_perform_sparing(struct device *dev, struct cxl_memdev_sparing_in_payload sparing_pi; struct cxl_event_dram *rec = NULL; u16 validity_flags = 0; + int ret; - struct rw_semaphore *region_lock __free(rwsem_read_release) = - rwsem_read_intr_acquire(&cxl_region_rwsem); - if (!region_lock) - return -EINTR; + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((ret = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return ret; - struct rw_semaphore *dpa_lock __free(rwsem_read_release) = - rwsem_read_intr_acquire(&cxl_dpa_rwsem); - if (!dpa_lock) - return -EINTR; + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((ret = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return ret; if (!cxl_sparing_ctx->cap_safe_when_in_use) { /* Memory to repair must be offline */ @@ -1523,7 +1520,7 @@ static int cxl_mem_sparing_set_dpa(struct device *dev, void *drv_data, u64 dpa) struct cxl_memdev *cxlmd = ctx->cxlmd; struct cxl_dev_state *cxlds = cxlmd->cxlds; - if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) + if (!cxl_resource_contains_addr(&cxlds->dpa_res, dpa)) return -EINVAL; ctx->dpa = dpa; @@ -1787,16 +1784,15 @@ static int cxl_mem_perform_ppr(struct cxl_ppr_context *cxl_ppr_ctx) struct cxl_memdev_ppr_maintenance_attrbs maintenance_attrbs; struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; struct cxl_mem_repair_attrbs attrbs = { 0 }; + int ret; - struct rw_semaphore *region_lock __free(rwsem_read_release) = - rwsem_read_intr_acquire(&cxl_region_rwsem); - if (!region_lock) - return -EINTR; + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((ret = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return ret; - struct rw_semaphore *dpa_lock __free(rwsem_read_release) = - rwsem_read_intr_acquire(&cxl_dpa_rwsem); - if (!dpa_lock) - return -EINTR; + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((ret = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return ret; if (!cxl_ppr_ctx->media_accessible || !cxl_ppr_ctx->data_retained) { /* Memory to repair must be offline */ @@ -1892,7 +1888,7 @@ static int cxl_ppr_set_dpa(struct device *dev, void *drv_data, u64 dpa) struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; struct cxl_dev_state *cxlds = cxlmd->cxlds; - if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) + if (!cxl_resource_contains_addr(&cxlds->dpa_res, dpa)) return -EINVAL; cxl_ppr_ctx->dpa = dpa; @@ -1923,8 +1919,11 @@ static int cxl_ppr_set_nibble_mask(struct device *dev, void *drv_data, static int cxl_do_ppr(struct device *dev, void *drv_data, u32 val) { struct cxl_ppr_context *cxl_ppr_ctx = drv_data; + struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd; + struct cxl_dev_state *cxlds = cxlmd->cxlds; - if (!cxl_ppr_ctx->dpa || val != EDAC_DO_MEM_REPAIR) + if (val != EDAC_DO_MEM_REPAIR || + !cxl_resource_contains_addr(&cxlds->dpa_res, cxl_ppr_ctx->dpa)) return -EINVAL; return cxl_mem_perform_ppr(cxl_ppr_ctx); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index ab1007495f6b..e9e1d555cec6 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -16,7 +16,10 @@ * for enumerating these registers and capabilities. */ -DECLARE_RWSEM(cxl_dpa_rwsem); +struct cxl_rwsem cxl_rwsem = { + .region = __RWSEM_INITIALIZER(cxl_rwsem.region), + .dpa = __RWSEM_INITIALIZER(cxl_rwsem.dpa), +}; static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, int *target_map) @@ -214,7 +217,7 @@ void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds) { struct resource *p1, *p2; - guard(rwsem_read)(&cxl_dpa_rwsem); + guard(rwsem_read)(&cxl_rwsem.dpa); for (p1 = cxlds->dpa_res.child; p1; p1 = p1->sibling) { __cxl_dpa_debug(file, p1, 0); for (p2 = p1->child; p2; p2 = p2->sibling) @@ -266,7 +269,7 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled) struct resource *res = cxled->dpa_res; resource_size_t skip_start; - lockdep_assert_held_write(&cxl_dpa_rwsem); + lockdep_assert_held_write(&cxl_rwsem.dpa); /* save @skip_start, before @res is released */ skip_start = res->start - cxled->skip; @@ -281,7 +284,7 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled) static void cxl_dpa_release(void *cxled) { - guard(rwsem_write)(&cxl_dpa_rwsem); + guard(rwsem_write)(&cxl_rwsem.dpa); __cxl_dpa_release(cxled); } @@ -293,7 +296,7 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled) { struct cxl_port *port = cxled_to_port(cxled); - lockdep_assert_held_write(&cxl_dpa_rwsem); + lockdep_assert_held_write(&cxl_rwsem.dpa); devm_remove_action(&port->dev, cxl_dpa_release, cxled); __cxl_dpa_release(cxled); } @@ -361,7 +364,7 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, struct resource *res; int rc; - lockdep_assert_held_write(&cxl_dpa_rwsem); + lockdep_assert_held_write(&cxl_rwsem.dpa); if (!len) { dev_warn(dev, "decoder%d.%d: empty reservation attempted\n", @@ -470,7 +473,7 @@ int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info) { struct device *dev = cxlds->dev; - guard(rwsem_write)(&cxl_dpa_rwsem); + guard(rwsem_write)(&cxl_rwsem.dpa); if (cxlds->nr_partitions) return -EBUSY; @@ -516,9 +519,8 @@ int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, struct cxl_port *port = cxled_to_port(cxled); int rc; - down_write(&cxl_dpa_rwsem); - rc = __cxl_dpa_reserve(cxled, base, len, skipped); - up_write(&cxl_dpa_rwsem); + scoped_guard(rwsem_write, &cxl_rwsem.dpa) + rc = __cxl_dpa_reserve(cxled, base, len, skipped); if (rc) return rc; @@ -529,7 +531,7 @@ EXPORT_SYMBOL_NS_GPL(devm_cxl_dpa_reserve, "CXL"); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled) { - guard(rwsem_read)(&cxl_dpa_rwsem); + guard(rwsem_read)(&cxl_rwsem.dpa); if (cxled->dpa_res) return resource_size(cxled->dpa_res); @@ -540,19 +542,26 @@ resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled) { resource_size_t base = -1; - lockdep_assert_held(&cxl_dpa_rwsem); + lockdep_assert_held(&cxl_rwsem.dpa); if (cxled->dpa_res) base = cxled->dpa_res->start; return base; } +bool cxl_resource_contains_addr(const struct resource *res, const resource_size_t addr) +{ + struct resource _addr = DEFINE_RES_MEM(addr, 1); + + return resource_contains(res, &_addr); +} + int cxl_dpa_free(struct cxl_endpoint_decoder *cxled) { struct cxl_port *port = cxled_to_port(cxled); struct device *dev = &cxled->cxld.dev; - guard(rwsem_write)(&cxl_dpa_rwsem); + guard(rwsem_write)(&cxl_rwsem.dpa); if (!cxled->dpa_res) return 0; if (cxled->cxld.region) { @@ -582,7 +591,7 @@ int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, struct device *dev = &cxled->cxld.dev; int part; - guard(rwsem_write)(&cxl_dpa_rwsem); + guard(rwsem_write)(&cxl_rwsem.dpa); if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) return -EBUSY; @@ -614,7 +623,7 @@ static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size) struct resource *p, *last; int part; - guard(rwsem_write)(&cxl_dpa_rwsem); + guard(rwsem_write)(&cxl_rwsem.dpa); if (cxled->cxld.region) { dev_dbg(dev, "decoder attached to %s\n", dev_name(&cxled->cxld.region->dev)); @@ -764,46 +773,12 @@ static int cxld_await_commit(void __iomem *hdm, int id) return -ETIMEDOUT; } -static int cxl_decoder_commit(struct cxl_decoder *cxld) +static void setup_hw_decoder(struct cxl_decoder *cxld, void __iomem *hdm) { - struct cxl_port *port = to_cxl_port(cxld->dev.parent); - struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); - void __iomem *hdm = cxlhdm->regs.hdm_decoder; - int id = cxld->id, rc; + int id = cxld->id; u64 base, size; u32 ctrl; - if (cxld->flags & CXL_DECODER_F_ENABLE) - return 0; - - if (cxl_num_decoders_committed(port) != id) { - dev_dbg(&port->dev, - "%s: out of order commit, expected decoder%d.%d\n", - dev_name(&cxld->dev), port->id, - cxl_num_decoders_committed(port)); - return -EBUSY; - } - - /* - * For endpoint decoders hosted on CXL memory devices that - * support the sanitize operation, make sure sanitize is not in-flight. - */ - if (is_endpoint_decoder(&cxld->dev)) { - struct cxl_endpoint_decoder *cxled = - to_cxl_endpoint_decoder(&cxld->dev); - struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_memdev_state *mds = - to_cxl_memdev_state(cxlmd->cxlds); - - if (mds && mds->security.sanitize_active) { - dev_dbg(&cxlmd->dev, - "attempted to commit %s during sanitize\n", - dev_name(&cxld->dev)); - return -EBUSY; - } - } - - down_read(&cxl_dpa_rwsem); /* common decoder settings */ ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(cxld->id)); cxld_set_interleave(cxld, &ctrl); @@ -837,7 +812,47 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) } writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id)); - up_read(&cxl_dpa_rwsem); +} + +static int cxl_decoder_commit(struct cxl_decoder *cxld) +{ + struct cxl_port *port = to_cxl_port(cxld->dev.parent); + struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + int id = cxld->id, rc; + + if (cxld->flags & CXL_DECODER_F_ENABLE) + return 0; + + if (cxl_num_decoders_committed(port) != id) { + dev_dbg(&port->dev, + "%s: out of order commit, expected decoder%d.%d\n", + dev_name(&cxld->dev), port->id, + cxl_num_decoders_committed(port)); + return -EBUSY; + } + + /* + * For endpoint decoders hosted on CXL memory devices that + * support the sanitize operation, make sure sanitize is not in-flight. + */ + if (is_endpoint_decoder(&cxld->dev)) { + struct cxl_endpoint_decoder *cxled = + to_cxl_endpoint_decoder(&cxld->dev); + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_memdev_state *mds = + to_cxl_memdev_state(cxlmd->cxlds); + + if (mds && mds->security.sanitize_active) { + dev_dbg(&cxlmd->dev, + "attempted to commit %s during sanitize\n", + dev_name(&cxld->dev)); + return -EBUSY; + } + } + + scoped_guard(rwsem_read, &cxl_rwsem.dpa) + setup_hw_decoder(cxld, hdm); port->commit_end++; rc = cxld_await_commit(hdm, cxld->id); @@ -875,7 +890,7 @@ void cxl_port_commit_reap(struct cxl_decoder *cxld) { struct cxl_port *port = to_cxl_port(cxld->dev.parent); - lockdep_assert_held_write(&cxl_region_rwsem); + lockdep_assert_held_write(&cxl_rwsem.region); /* * Once the highest committed decoder is disabled, free any other @@ -907,7 +922,6 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld) "%s: out of order reset, expected decoder%d.%d\n", dev_name(&cxld->dev), port->id, port->commit_end); - down_read(&cxl_dpa_rwsem); ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id)); ctrl &= ~CXL_HDM_DECODER0_CTRL_COMMIT; writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id)); @@ -916,7 +930,6 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld) writel(0, hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(id)); writel(0, hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(id)); writel(0, hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(id)); - up_read(&cxl_dpa_rwsem); cxld->flags &= ~CXL_DECODER_F_ENABLE; @@ -1025,7 +1038,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, else cxld->target_type = CXL_DECODER_DEVMEM; - guard(rwsem_write)(&cxl_region_rwsem); + guard(rwsem_write)(&cxl_rwsem.region); if (cxld->id != cxl_num_decoders_committed(port)) { dev_warn(&port->dev, "decoder%d.%d: Committed out of order\n", diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 2689e6453c5a..fa6dd0c94656 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -899,6 +899,10 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, trace_cxl_generic_event(cxlmd, type, uuid, &evt->generic); return; } + if (event_type == CXL_CPER_EVENT_MEM_SPARING) { + trace_cxl_memory_sparing(cxlmd, type, &evt->mem_sparing); + return; + } if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) { u64 dpa, hpa = ULLONG_MAX, hpa_alias = ULLONG_MAX; @@ -909,8 +913,8 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, * translations. Take topology mutation locks and lookup * { HPA, REGION } from { DPA, MEMDEV } in the event record. */ - guard(rwsem_read)(&cxl_region_rwsem); - guard(rwsem_read)(&cxl_dpa_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); + guard(rwsem_read)(&cxl_rwsem.dpa); dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK; cxlr = cxl_dpa_to_region(cxlmd, dpa); @@ -926,12 +930,30 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, if (cxl_store_rec_gen_media((struct cxl_memdev *)cxlmd, evt)) dev_dbg(&cxlmd->dev, "CXL store rec_gen_media failed\n"); + if (evt->gen_media.media_hdr.descriptor & + CXL_GMER_EVT_DESC_THRESHOLD_EVENT) + WARN_ON_ONCE((evt->gen_media.media_hdr.type & + CXL_GMER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE) && + !get_unaligned_le24(evt->gen_media.cme_count)); + else + WARN_ON_ONCE(evt->gen_media.media_hdr.type & + CXL_GMER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE); + trace_cxl_general_media(cxlmd, type, cxlr, hpa, hpa_alias, &evt->gen_media); } else if (event_type == CXL_CPER_EVENT_DRAM) { if (cxl_store_rec_dram((struct cxl_memdev *)cxlmd, evt)) dev_dbg(&cxlmd->dev, "CXL store rec_dram failed\n"); + if (evt->dram.media_hdr.descriptor & + CXL_GMER_EVT_DESC_THRESHOLD_EVENT) + WARN_ON_ONCE((evt->dram.media_hdr.type & + CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE) && + !get_unaligned_le24(evt->dram.cvme_count)); + else + WARN_ON_ONCE(evt->dram.media_hdr.type & + CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE); + trace_cxl_dram(cxlmd, type, cxlr, hpa, hpa_alias, &evt->dram); } @@ -952,6 +974,8 @@ static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd, ev_type = CXL_CPER_EVENT_DRAM; else if (uuid_equal(uuid, &CXL_EVENT_MEM_MODULE_UUID)) ev_type = CXL_CPER_EVENT_MEM_MODULE; + else if (uuid_equal(uuid, &CXL_EVENT_MEM_SPARING_UUID)) + ev_type = CXL_CPER_EVENT_MEM_SPARING; cxl_event_trace_record(cxlmd, type, ev_type, uuid, &record->event); } @@ -1265,7 +1289,7 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) /* synchronize with cxl_mem_probe() and decoder write operations */ guard(device)(&cxlmd->dev); endpoint = cxlmd->endpoint; - guard(rwsem_read)(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); /* * Require an endpoint to be safe otherwise the driver can not * be sure that the device is unmapped. @@ -1401,8 +1425,8 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, int nr_records = 0; int rc; - rc = mutex_lock_interruptible(&mds->poison.lock); - if (rc) + ACQUIRE(mutex_intr, lock)(&mds->poison.mutex); + if ((rc = ACQUIRE_ERR(mutex_intr, &lock))) return rc; po = mds->poison.list_out; @@ -1437,7 +1461,6 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, } } while (po->flags & CXL_POISON_FLAG_MORE); - mutex_unlock(&mds->poison.lock); return rc; } EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison, "CXL"); @@ -1473,7 +1496,7 @@ int cxl_poison_state_init(struct cxl_memdev_state *mds) return rc; } - mutex_init(&mds->poison.lock); + mutex_init(&mds->poison.mutex); return 0; } EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, "CXL"); diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index f88a13adf7fa..c569e00a511f 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -232,15 +232,13 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) if (!port || !is_cxl_endpoint(port)) return -EINVAL; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) return rc; - rc = down_read_interruptible(&cxl_dpa_rwsem); - if (rc) { - up_read(&cxl_region_rwsem); + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) return rc; - } if (cxl_num_decoders_committed(port) == 0) { /* No regions mapped to this memdev */ @@ -249,8 +247,6 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) /* Regions mapped, collect poison by endpoint */ rc = cxl_get_poison_by_endpoint(port); } - up_read(&cxl_dpa_rwsem); - up_read(&cxl_region_rwsem); return rc; } @@ -267,7 +263,7 @@ static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) dev_dbg(cxlds->dev, "device has no dpa resource\n"); return -EINVAL; } - if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) { + if (!cxl_resource_contains_addr(&cxlds->dpa_res, dpa)) { dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n", dpa, &cxlds->dpa_res); return -EINVAL; @@ -292,19 +288,17 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) return rc; - rc = down_read_interruptible(&cxl_dpa_rwsem); - if (rc) { - up_read(&cxl_region_rwsem); + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) return rc; - } rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) - goto out; + return rc; inject.address = cpu_to_le64(dpa); mbox_cmd = (struct cxl_mbox_cmd) { @@ -314,7 +308,7 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) }; rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc) - goto out; + return rc; cxlr = cxl_dpa_to_region(cxlmd, dpa); if (cxlr) @@ -327,11 +321,8 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) .length = cpu_to_le32(1), }; trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT); -out: - up_read(&cxl_dpa_rwsem); - up_read(&cxl_region_rwsem); - return rc; + return 0; } EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, "CXL"); @@ -347,19 +338,17 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) return rc; - rc = down_read_interruptible(&cxl_dpa_rwsem); - if (rc) { - up_read(&cxl_region_rwsem); + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) return rc; - } rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) - goto out; + return rc; /* * In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command @@ -378,7 +367,7 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); if (rc) - goto out; + return rc; cxlr = cxl_dpa_to_region(cxlmd, dpa); if (cxlr) @@ -391,11 +380,8 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) .length = cpu_to_le32(1), }; trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR); -out: - up_read(&cxl_dpa_rwsem); - up_read(&cxl_region_rwsem); - return rc; + return 0; } EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, "CXL"); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index eb46c6764d20..29197376b18e 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -30,18 +30,12 @@ * instantiated by the core. */ -/* - * All changes to the interleave configuration occur with this lock held - * for write. - */ -DECLARE_RWSEM(cxl_region_rwsem); - static DEFINE_IDA(cxl_port_ida); static DEFINE_XARRAY(cxl_root_buses); int cxl_num_decoders_committed(struct cxl_port *port) { - lockdep_assert_held(&cxl_region_rwsem); + lockdep_assert_held(&cxl_rwsem.region); return port->commit_end + 1; } @@ -176,7 +170,7 @@ static ssize_t target_list_show(struct device *dev, ssize_t offset; int rc; - guard(rwsem_read)(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); rc = emit_target_list(cxlsd, buf); if (rc < 0) return rc; @@ -196,7 +190,7 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr, struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; - /* without @cxl_dpa_rwsem, make sure @part is not reloaded */ + /* without @cxl_rwsem.dpa, make sure @part is not reloaded */ int part = READ_ONCE(cxled->part); const char *desc; @@ -235,7 +229,7 @@ static ssize_t dpa_resource_show(struct device *dev, struct device_attribute *at { struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); - guard(rwsem_read)(&cxl_dpa_rwsem); + guard(rwsem_read)(&cxl_rwsem.dpa); return sysfs_emit(buf, "%#llx\n", (u64)cxl_dpa_resource_start(cxled)); } static DEVICE_ATTR_RO(dpa_resource); @@ -560,7 +554,7 @@ static ssize_t decoders_committed_show(struct device *dev, { struct cxl_port *port = to_cxl_port(dev); - guard(rwsem_read)(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); return sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port)); } @@ -1722,7 +1716,7 @@ static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd, if (xa_empty(&port->dports)) return -EINVAL; - guard(rwsem_write)(&cxl_region_rwsem); + guard(rwsem_write)(&cxl_rwsem.region); for (i = 0; i < cxlsd->cxld.interleave_ways; i++) { struct cxl_dport *dport = find_dport(port, target_map[i]); @@ -2001,12 +1995,9 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, "CXL"); static void cxld_unregister(void *dev) { - struct cxl_endpoint_decoder *cxled; - - if (is_endpoint_decoder(dev)) { - cxled = to_cxl_endpoint_decoder(dev); - cxl_decoder_kill_region(cxled); - } + if (is_endpoint_decoder(dev)) + cxl_decoder_detach(NULL, to_cxl_endpoint_decoder(dev), -1, + DETACH_INVALIDATE); device_unregister(dev); } @@ -2293,7 +2284,7 @@ static const struct attribute_group *cxl_bus_attribute_groups[] = { NULL, }; -struct bus_type cxl_bus_type = { +const struct bus_type cxl_bus_type = { .name = "cxl", .uevent = cxl_bus_uevent, .match = cxl_bus_match, diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index ba42259c3701..71cc42d05248 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -141,16 +141,12 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, struct cxl_region_params *p = &cxlr->params; ssize_t rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) return rc; if (cxlr->mode != CXL_PARTMODE_PMEM) - rc = sysfs_emit(buf, "\n"); - else - rc = sysfs_emit(buf, "%pUb\n", &p->uuid); - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "\n"); + return sysfs_emit(buf, "%pUb\n", &p->uuid); } static int is_dup(struct device *match, void *data) @@ -162,7 +158,7 @@ static int is_dup(struct device *match, void *data) if (!is_cxl_region(match)) return 0; - lockdep_assert_held(&cxl_region_rwsem); + lockdep_assert_held(&cxl_rwsem.region); cxlr = to_cxl_region(match); p = &cxlr->params; @@ -192,27 +188,22 @@ static ssize_t uuid_store(struct device *dev, struct device_attribute *attr, if (uuid_is_null(&temp)) return -EINVAL; - rc = down_write_killable(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_write_kill, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, ®ion_rwsem))) return rc; if (uuid_equal(&p->uuid, &temp)) - goto out; + return len; - rc = -EBUSY; if (p->state >= CXL_CONFIG_ACTIVE) - goto out; + return -EBUSY; rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup); if (rc < 0) - goto out; + return rc; uuid_copy(&p->uuid, &temp); -out: - up_write(&cxl_region_rwsem); - if (rc) - return rc; return len; } static DEVICE_ATTR_RW(uuid); @@ -349,33 +340,40 @@ err: return rc; } -static ssize_t commit_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t len) +static int queue_reset(struct cxl_region *cxlr) { - struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; - bool commit; - ssize_t rc; + int rc; - rc = kstrtobool(buf, &commit); - if (rc) + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) return rc; - rc = down_write_killable(&cxl_region_rwsem); - if (rc) + /* Already in the requested state? */ + if (p->state < CXL_CONFIG_COMMIT) + return 0; + + p->state = CXL_CONFIG_RESET_PENDING; + + return 0; +} + +static int __commit(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + int rc; + + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) return rc; /* Already in the requested state? */ - if (commit && p->state >= CXL_CONFIG_COMMIT) - goto out; - if (!commit && p->state < CXL_CONFIG_COMMIT) - goto out; + if (p->state >= CXL_CONFIG_COMMIT) + return 0; /* Not ready to commit? */ - if (commit && p->state < CXL_CONFIG_ACTIVE) { - rc = -ENXIO; - goto out; - } + if (p->state < CXL_CONFIG_ACTIVE) + return -ENXIO; /* * Invalidate caches before region setup to drop any speculative @@ -383,33 +381,61 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr, */ rc = cxl_region_invalidate_memregion(cxlr); if (rc) - goto out; + return rc; - if (commit) { - rc = cxl_region_decode_commit(cxlr); - if (rc == 0) - p->state = CXL_CONFIG_COMMIT; - } else { - p->state = CXL_CONFIG_RESET_PENDING; - up_write(&cxl_region_rwsem); - device_release_driver(&cxlr->dev); - down_write(&cxl_region_rwsem); + rc = cxl_region_decode_commit(cxlr); + if (rc) + return rc; - /* - * The lock was dropped, so need to revalidate that the reset is - * still pending. - */ - if (p->state == CXL_CONFIG_RESET_PENDING) { - cxl_region_decode_reset(cxlr, p->interleave_ways); - p->state = CXL_CONFIG_ACTIVE; - } - } + p->state = CXL_CONFIG_COMMIT; -out: - up_write(&cxl_region_rwsem); + return 0; +} +static ssize_t commit_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + bool commit; + ssize_t rc; + + rc = kstrtobool(buf, &commit); if (rc) return rc; + + if (commit) { + rc = __commit(cxlr); + if (rc) + return rc; + return len; + } + + rc = queue_reset(cxlr); + if (rc) + return rc; + + /* + * Unmap the region and depend the reset-pending state to ensure + * it does not go active again until post reset + */ + device_release_driver(&cxlr->dev); + + /* + * With the reset pending take cxl_rwsem.region unconditionally + * to ensure the reset gets handled before returning. + */ + guard(rwsem_write)(&cxl_rwsem.region); + + /* + * Revalidate that the reset is still pending in case another + * thread already handled this reset. + */ + if (p->state == CXL_CONFIG_RESET_PENDING) { + cxl_region_decode_reset(cxlr, p->interleave_ways); + p->state = CXL_CONFIG_ACTIVE; + } + return len; } @@ -420,13 +446,10 @@ static ssize_t commit_show(struct device *dev, struct device_attribute *attr, struct cxl_region_params *p = &cxlr->params; ssize_t rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) return rc; - rc = sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT); - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT); } static DEVICE_ATTR_RW(commit); @@ -450,15 +473,12 @@ static ssize_t interleave_ways_show(struct device *dev, { struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; - ssize_t rc; + int rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) return rc; - rc = sysfs_emit(buf, "%d\n", p->interleave_ways); - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "%d\n", p->interleave_ways); } static const struct attribute_group *get_cxl_region_target_group(void); @@ -493,23 +513,21 @@ static ssize_t interleave_ways_store(struct device *dev, return -EINVAL; } - rc = down_write_killable(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) return rc; - if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { - rc = -EBUSY; - goto out; - } + + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) + return -EBUSY; save = p->interleave_ways; p->interleave_ways = val; rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group()); - if (rc) + if (rc) { p->interleave_ways = save; -out: - up_write(&cxl_region_rwsem); - if (rc) return rc; + } + return len; } static DEVICE_ATTR_RW(interleave_ways); @@ -520,15 +538,12 @@ static ssize_t interleave_granularity_show(struct device *dev, { struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; - ssize_t rc; + int rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) return rc; - rc = sysfs_emit(buf, "%d\n", p->interleave_granularity); - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "%d\n", p->interleave_granularity); } static ssize_t interleave_granularity_store(struct device *dev, @@ -561,19 +576,15 @@ static ssize_t interleave_granularity_store(struct device *dev, if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity) return -EINVAL; - rc = down_write_killable(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) return rc; - if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { - rc = -EBUSY; - goto out; - } + + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) + return -EBUSY; p->interleave_granularity = val; -out: - up_write(&cxl_region_rwsem); - if (rc) - return rc; + return len; } static DEVICE_ATTR_RW(interleave_granularity); @@ -584,17 +595,15 @@ static ssize_t resource_show(struct device *dev, struct device_attribute *attr, struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; u64 resource = -1ULL; - ssize_t rc; + int rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) return rc; + if (p->res) resource = p->res->start; - rc = sysfs_emit(buf, "%#llx\n", resource); - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "%#llx\n", resource); } static DEVICE_ATTR_RO(resource); @@ -622,7 +631,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) struct resource *res; u64 remainder = 0; - lockdep_assert_held_write(&cxl_region_rwsem); + lockdep_assert_held_write(&cxl_rwsem.region); /* Nothing to do... */ if (p->res && resource_size(p->res) == size) @@ -664,7 +673,7 @@ static void cxl_region_iomem_release(struct cxl_region *cxlr) struct cxl_region_params *p = &cxlr->params; if (device_is_registered(&cxlr->dev)) - lockdep_assert_held_write(&cxl_region_rwsem); + lockdep_assert_held_write(&cxl_rwsem.region); if (p->res) { /* * Autodiscovered regions may not have been able to insert their @@ -681,7 +690,7 @@ static int free_hpa(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; - lockdep_assert_held_write(&cxl_region_rwsem); + lockdep_assert_held_write(&cxl_rwsem.region); if (!p->res) return 0; @@ -705,15 +714,14 @@ static ssize_t size_store(struct device *dev, struct device_attribute *attr, if (rc) return rc; - rc = down_write_killable(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) return rc; if (val) rc = alloc_hpa(cxlr, val); else rc = free_hpa(cxlr); - up_write(&cxl_region_rwsem); if (rc) return rc; @@ -729,15 +737,12 @@ static ssize_t size_show(struct device *dev, struct device_attribute *attr, u64 size = 0; ssize_t rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) return rc; if (p->res) size = resource_size(p->res); - rc = sysfs_emit(buf, "%#llx\n", size); - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "%#llx\n", size); } static DEVICE_ATTR_RW(size); @@ -763,26 +768,20 @@ static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos) struct cxl_endpoint_decoder *cxled; int rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) return rc; if (pos >= p->interleave_ways) { dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, p->interleave_ways); - rc = -ENXIO; - goto out; + return -ENXIO; } cxled = p->targets[pos]; if (!cxled) - rc = sysfs_emit(buf, "\n"); - else - rc = sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev)); -out: - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "\n"); + return sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev)); } static int check_commit_order(struct device *dev, void *data) @@ -897,7 +896,7 @@ cxl_port_pick_region_decoder(struct cxl_port *port, /* * This decoder is pinned registered as long as the endpoint decoder is * registered, and endpoint decoder unregistration holds the - * cxl_region_rwsem over unregister events, so no need to hold on to + * cxl_rwsem.region over unregister events, so no need to hold on to * this extra reference. */ put_device(dev); @@ -1088,7 +1087,7 @@ static int cxl_port_attach_region(struct cxl_port *port, unsigned long index; int rc = -EBUSY; - lockdep_assert_held_write(&cxl_region_rwsem); + lockdep_assert_held_write(&cxl_rwsem.region); cxl_rr = cxl_rr_load(port, cxlr); if (cxl_rr) { @@ -1198,7 +1197,7 @@ static void cxl_port_detach_region(struct cxl_port *port, struct cxl_region_ref *cxl_rr; struct cxl_ep *ep = NULL; - lockdep_assert_held_write(&cxl_region_rwsem); + lockdep_assert_held_write(&cxl_rwsem.region); cxl_rr = cxl_rr_load(port, cxlr); if (!cxl_rr) @@ -2094,27 +2093,43 @@ static int cxl_region_attach(struct cxl_region *cxlr, return 0; } -static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) +static struct cxl_region * +__cxl_decoder_detach(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos, + enum cxl_detach_mode mode) { - struct cxl_port *iter, *ep_port = cxled_to_port(cxled); - struct cxl_region *cxlr = cxled->cxld.region; struct cxl_region_params *p; - int rc = 0; - lockdep_assert_held_write(&cxl_region_rwsem); + lockdep_assert_held_write(&cxl_rwsem.region); - if (!cxlr) - return 0; + if (!cxled) { + p = &cxlr->params; - p = &cxlr->params; - get_device(&cxlr->dev); + if (pos >= p->interleave_ways) { + dev_dbg(&cxlr->dev, "position %d out of range %d\n", + pos, p->interleave_ways); + return NULL; + } + + if (!p->targets[pos]) + return NULL; + cxled = p->targets[pos]; + } else { + cxlr = cxled->cxld.region; + if (!cxlr) + return NULL; + p = &cxlr->params; + } + + if (mode == DETACH_INVALIDATE) + cxled->part = -1; if (p->state > CXL_CONFIG_ACTIVE) { cxl_region_decode_reset(cxlr, p->interleave_ways); p->state = CXL_CONFIG_ACTIVE; } - for (iter = ep_port; !is_cxl_root(iter); + for (struct cxl_port *iter = cxled_to_port(cxled); !is_cxl_root(iter); iter = to_cxl_port(iter->dev.parent)) cxl_port_detach_region(iter, cxlr, cxled); @@ -2125,7 +2140,7 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), cxled->pos); - goto out; + return NULL; } if (p->state == CXL_CONFIG_ACTIVE) { @@ -2139,74 +2154,79 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) .end = -1, }; - /* notify the region driver that one of its targets has departed */ - up_write(&cxl_region_rwsem); - device_release_driver(&cxlr->dev); - down_write(&cxl_region_rwsem); -out: - put_device(&cxlr->dev); - return rc; + get_device(&cxlr->dev); + return cxlr; } -void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) +/* + * Cleanup a decoder's interest in a region. There are 2 cases to + * handle, removing an unknown @cxled from a known position in a region + * (detach_target()) or removing a known @cxled from an unknown @cxlr + * (cxld_unregister()) + * + * When the detachment finds a region release the region driver. + */ +int cxl_decoder_detach(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos, + enum cxl_detach_mode mode) { - down_write(&cxl_region_rwsem); - cxled->part = -1; - cxl_region_detach(cxled); - up_write(&cxl_region_rwsem); + struct cxl_region *detach; + + /* when the decoder is being destroyed lock unconditionally */ + if (mode == DETACH_INVALIDATE) { + guard(rwsem_write)(&cxl_rwsem.region); + detach = __cxl_decoder_detach(cxlr, cxled, pos, mode); + } else { + int rc; + + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) + return rc; + detach = __cxl_decoder_detach(cxlr, cxled, pos, mode); + } + + if (detach) { + device_release_driver(&detach->dev); + put_device(&detach->dev); + } + return 0; +} + +static int __attach_target(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos, + unsigned int state) +{ + int rc; + + if (state == TASK_INTERRUPTIBLE) { + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) + return rc; + guard(rwsem_read)(&cxl_rwsem.dpa); + return cxl_region_attach(cxlr, cxled, pos); + } + guard(rwsem_write)(&cxl_rwsem.region); + guard(rwsem_read)(&cxl_rwsem.dpa); + return cxl_region_attach(cxlr, cxled, pos); } static int attach_target(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled, int pos, unsigned int state) { - int rc = 0; + int rc = __attach_target(cxlr, cxled, pos, state); - if (state == TASK_INTERRUPTIBLE) - rc = down_write_killable(&cxl_region_rwsem); - else - down_write(&cxl_region_rwsem); - if (rc) - return rc; - - down_read(&cxl_dpa_rwsem); - rc = cxl_region_attach(cxlr, cxled, pos); - up_read(&cxl_dpa_rwsem); - up_write(&cxl_region_rwsem); - - if (rc) - dev_warn(cxled->cxld.dev.parent, - "failed to attach %s to %s: %d\n", - dev_name(&cxled->cxld.dev), dev_name(&cxlr->dev), rc); + if (rc == 0) + return 0; + dev_warn(cxled->cxld.dev.parent, "failed to attach %s to %s: %d\n", + dev_name(&cxled->cxld.dev), dev_name(&cxlr->dev), rc); return rc; } static int detach_target(struct cxl_region *cxlr, int pos) { - struct cxl_region_params *p = &cxlr->params; - int rc; - - rc = down_write_killable(&cxl_region_rwsem); - if (rc) - return rc; - - if (pos >= p->interleave_ways) { - dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, - p->interleave_ways); - rc = -ENXIO; - goto out; - } - - if (!p->targets[pos]) { - rc = 0; - goto out; - } - - rc = cxl_region_detach(p->targets[pos]); -out: - up_write(&cxl_region_rwsem); - return rc; + return cxl_decoder_detach(cxlr, NULL, pos, DETACH_ONLY); } static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos, @@ -2460,7 +2480,7 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb, return NOTIFY_DONE; /* - * No need to hold cxl_region_rwsem; region parameters are stable + * No need to hold cxl_rwsem.region; region parameters are stable * within the cxl_region driver. */ region_nid = phys_to_target_node(cxlr->params.res->start); @@ -2483,7 +2503,7 @@ static int cxl_region_calculate_adistance(struct notifier_block *nb, int region_nid; /* - * No need to hold cxl_region_rwsem; region parameters are stable + * No need to hold cxl_rwsem.region; region parameters are stable * within the cxl_region driver. */ region_nid = phys_to_target_node(cxlr->params.res->start); @@ -2632,17 +2652,13 @@ static ssize_t region_show(struct device *dev, struct device_attribute *attr, struct cxl_decoder *cxld = to_cxl_decoder(dev); ssize_t rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) return rc; if (cxld->region) - rc = sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev)); - else - rc = sysfs_emit(buf, "\n"); - up_read(&cxl_region_rwsem); - - return rc; + return sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev)); + return sysfs_emit(buf, "\n"); } DEVICE_ATTR_RO(region); @@ -2847,7 +2863,7 @@ static int __cxl_dpa_to_region(struct device *dev, void *arg) if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res)) return 0; - if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start) + if (!cxl_resource_contains_addr(cxled->dpa_res, dpa)) return 0; /* @@ -2959,7 +2975,7 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, if (cxlrd->hpa_to_spa) hpa = cxlrd->hpa_to_spa(cxlrd, hpa); - if (hpa < p->res->start || hpa > p->res->end) { + if (!cxl_resource_contains_addr(p->res, hpa)) { dev_dbg(&cxlr->dev, "Addr trans fail: hpa 0x%llx not in region\n", hpa); return ULLONG_MAX; @@ -2981,7 +2997,7 @@ static int cxl_pmem_region_alloc(struct cxl_region *cxlr) struct device *dev; int i; - guard(rwsem_read)(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); if (p->state != CXL_CONFIG_COMMIT) return -ENXIO; @@ -2993,7 +3009,7 @@ static int cxl_pmem_region_alloc(struct cxl_region *cxlr) cxlr_pmem->hpa_range.start = p->res->start; cxlr_pmem->hpa_range.end = p->res->end; - /* Snapshot the region configuration underneath the cxl_region_rwsem */ + /* Snapshot the region configuration underneath the cxl_rwsem.region */ cxlr_pmem->nr_mappings = p->nr_targets; for (i = 0; i < p->nr_targets; i++) { struct cxl_endpoint_decoder *cxled = p->targets[i]; @@ -3070,7 +3086,7 @@ static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr) struct cxl_dax_region *cxlr_dax; struct device *dev; - guard(rwsem_read)(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); if (p->state != CXL_CONFIG_COMMIT) return ERR_PTR(-ENXIO); @@ -3270,7 +3286,7 @@ static int match_region_by_range(struct device *dev, const void *data) cxlr = to_cxl_region(dev); p = &cxlr->params; - guard(rwsem_read)(&cxl_region_rwsem); + guard(rwsem_read)(&cxl_rwsem.region); if (p->res && p->res->start == r->start && p->res->end == r->end) return 1; @@ -3282,15 +3298,10 @@ static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_region_params *p = &cxlr->params; - int nid = phys_to_target_node(res->start); resource_size_t size = resource_size(res); resource_size_t cache_size, start; - int rc; - - rc = cxl_acpi_get_extended_linear_cache_size(res, nid, &cache_size); - if (rc) - return rc; + cache_size = cxlrd->cache_size; if (!cache_size) return 0; @@ -3330,7 +3341,7 @@ static int __construct_region(struct cxl_region *cxlr, struct resource *res; int rc; - guard(rwsem_write)(&cxl_region_rwsem); + guard(rwsem_write)(&cxl_rwsem.region); p = &cxlr->params; if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { dev_err(cxlmd->dev.parent, @@ -3466,10 +3477,10 @@ int cxl_add_to_region(struct cxl_endpoint_decoder *cxled) attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE); - down_read(&cxl_region_rwsem); - p = &cxlr->params; - attach = p->state == CXL_CONFIG_COMMIT; - up_read(&cxl_region_rwsem); + scoped_guard(rwsem_read, &cxl_rwsem.region) { + p = &cxlr->params; + attach = p->state == CXL_CONFIG_COMMIT; + } if (attach) { /* @@ -3494,12 +3505,12 @@ u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa) if (!endpoint) return ~0ULL; - guard(rwsem_write)(&cxl_region_rwsem); + guard(rwsem_write)(&cxl_rwsem.region); xa_for_each(&endpoint->regions, index, iter) { struct cxl_region_params *p = &iter->region->params; - if (p->res->start <= spa && spa <= p->res->end) { + if (cxl_resource_contains_addr(p->res, spa)) { if (!p->cache_size) return ~0ULL; @@ -3531,40 +3542,45 @@ static void shutdown_notifiers(void *_cxlr) unregister_mt_adistance_algorithm(&cxlr->adist_notifier); } -static int cxl_region_probe(struct device *dev) +static int cxl_region_can_probe(struct cxl_region *cxlr) { - struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; int rc; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) { + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) { dev_dbg(&cxlr->dev, "probe interrupted\n"); return rc; } if (p->state < CXL_CONFIG_COMMIT) { dev_dbg(&cxlr->dev, "config state: %d\n", p->state); - rc = -ENXIO; - goto out; + return -ENXIO; } if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) { dev_err(&cxlr->dev, "failed to activate, re-commit region and retry\n"); - rc = -ENXIO; - goto out; + return -ENXIO; } + return 0; +} + +static int cxl_region_probe(struct device *dev) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + int rc; + + rc = cxl_region_can_probe(cxlr); + if (rc) + return rc; + /* * From this point on any path that changes the region's state away from * CXL_CONFIG_COMMIT is also responsible for releasing the driver. */ -out: - up_read(&cxl_region_rwsem); - - if (rc) - return rc; cxlr->node_notifier.notifier_call = cxl_region_perf_attrs_callback; cxlr->node_notifier.priority = CXL_CALLBACK_PRI; diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 25ebfbc1616c..a53ec4798b12 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -214,12 +214,16 @@ TRACE_EVENT(cxl_overflow, #define CXL_EVENT_RECORD_FLAG_PERF_DEGRADED BIT(4) #define CXL_EVENT_RECORD_FLAG_HW_REPLACE BIT(5) #define CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID BIT(6) +#define CXL_EVENT_RECORD_FLAG_LD_ID_VALID BIT(7) +#define CXL_EVENT_RECORD_FLAG_HEAD_ID_VALID BIT(8) #define show_hdr_flags(flags) __print_flags(flags, " | ", \ { CXL_EVENT_RECORD_FLAG_PERMANENT, "PERMANENT_CONDITION" }, \ { CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, "MAINTENANCE_NEEDED" }, \ { CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, "PERFORMANCE_DEGRADED" }, \ { CXL_EVENT_RECORD_FLAG_HW_REPLACE, "HARDWARE_REPLACEMENT_NEEDED" }, \ - { CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID, "MAINT_OP_SUB_CLASS_VALID" } \ + { CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID, "MAINT_OP_SUB_CLASS_VALID" }, \ + { CXL_EVENT_RECORD_FLAG_LD_ID_VALID, "LD_ID_VALID" }, \ + { CXL_EVENT_RECORD_FLAG_HEAD_ID_VALID, "HEAD_ID_VALID" } \ ) /* @@ -247,7 +251,9 @@ TRACE_EVENT(cxl_overflow, __field(u64, hdr_timestamp) \ __field(u8, hdr_length) \ __field(u8, hdr_maint_op_class) \ - __field(u8, hdr_maint_op_sub_class) + __field(u8, hdr_maint_op_sub_class) \ + __field(u16, hdr_ld_id) \ + __field(u8, hdr_head_id) #define CXL_EVT_TP_fast_assign(cxlmd, l, hdr) \ __assign_str(memdev); \ @@ -260,18 +266,22 @@ TRACE_EVENT(cxl_overflow, __entry->hdr_related_handle = le16_to_cpu((hdr).related_handle); \ __entry->hdr_timestamp = le64_to_cpu((hdr).timestamp); \ __entry->hdr_maint_op_class = (hdr).maint_op_class; \ - __entry->hdr_maint_op_sub_class = (hdr).maint_op_sub_class + __entry->hdr_maint_op_sub_class = (hdr).maint_op_sub_class; \ + __entry->hdr_ld_id = le16_to_cpu((hdr).ld_id); \ + __entry->hdr_head_id = (hdr).head_id #define CXL_EVT_TP_printk(fmt, ...) \ TP_printk("memdev=%s host=%s serial=%lld log=%s : time=%llu uuid=%pUb " \ "len=%d flags='%s' handle=%x related_handle=%x " \ - "maint_op_class=%u maint_op_sub_class=%u : " fmt, \ + "maint_op_class=%u maint_op_sub_class=%u " \ + "ld_id=%x head_id=%x : " fmt, \ __get_str(memdev), __get_str(host), __entry->serial, \ cxl_event_log_type_str(__entry->log), \ __entry->hdr_timestamp, &__entry->hdr_uuid, __entry->hdr_length,\ show_hdr_flags(__entry->hdr_flags), __entry->hdr_handle, \ __entry->hdr_related_handle, __entry->hdr_maint_op_class, \ __entry->hdr_maint_op_sub_class, \ + __entry->hdr_ld_id, __entry->hdr_head_id, \ ##__VA_ARGS__) TRACE_EVENT(cxl_generic_event, @@ -496,7 +506,10 @@ TRACE_EVENT(cxl_general_media, uuid_copy(&__entry->region_uuid, &uuid_null); } __entry->cme_threshold_ev_flags = rec->cme_threshold_ev_flags; - __entry->cme_count = get_unaligned_le24(rec->cme_count); + if (rec->media_hdr.descriptor & CXL_GMER_EVT_DESC_THRESHOLD_EVENT) + __entry->cme_count = get_unaligned_le24(rec->cme_count); + else + __entry->cme_count = 0; ), CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' " \ @@ -648,7 +661,10 @@ TRACE_EVENT(cxl_dram, CXL_EVENT_GEN_MED_COMP_ID_SIZE); __entry->sub_channel = rec->sub_channel; __entry->cme_threshold_ev_flags = rec->cme_threshold_ev_flags; - __entry->cvme_count = get_unaligned_le24(rec->cvme_count); + if (rec->media_hdr.descriptor & CXL_GMER_EVT_DESC_THRESHOLD_EVENT) + __entry->cvme_count = get_unaligned_le24(rec->cvme_count); + else + __entry->cvme_count = 0; ), CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' sub_type='%s' " \ @@ -871,6 +887,111 @@ TRACE_EVENT(cxl_memory_module, ) ); +/* + * Memory Sparing Event Record - MSER + * + * CXL rev 3.2 section 8.2.10.2.1.4; Table 8-60 + */ +#define CXL_MSER_QUERY_RESOURCE_FLAG BIT(0) +#define CXL_MSER_HARD_SPARING_FLAG BIT(1) +#define CXL_MSER_DEV_INITED_FLAG BIT(2) +#define show_mem_sparing_flags(flags) __print_flags(flags, "|", \ + { CXL_MSER_QUERY_RESOURCE_FLAG, "Query Resources" }, \ + { CXL_MSER_HARD_SPARING_FLAG, "Hard Sparing" }, \ + { CXL_MSER_DEV_INITED_FLAG, "Device Initiated Sparing" } \ +) + +#define CXL_MSER_VALID_CHANNEL BIT(0) +#define CXL_MSER_VALID_RANK BIT(1) +#define CXL_MSER_VALID_NIBBLE BIT(2) +#define CXL_MSER_VALID_BANK_GROUP BIT(3) +#define CXL_MSER_VALID_BANK BIT(4) +#define CXL_MSER_VALID_ROW BIT(5) +#define CXL_MSER_VALID_COLUMN BIT(6) +#define CXL_MSER_VALID_COMPONENT_ID BIT(7) +#define CXL_MSER_VALID_COMPONENT_ID_FORMAT BIT(8) +#define CXL_MSER_VALID_SUB_CHANNEL BIT(9) +#define show_mem_sparing_valid_flags(flags) __print_flags(flags, "|", \ + { CXL_MSER_VALID_CHANNEL, "CHANNEL" }, \ + { CXL_MSER_VALID_RANK, "RANK" }, \ + { CXL_MSER_VALID_NIBBLE, "NIBBLE" }, \ + { CXL_MSER_VALID_BANK_GROUP, "BANK GROUP" }, \ + { CXL_MSER_VALID_BANK, "BANK" }, \ + { CXL_MSER_VALID_ROW, "ROW" }, \ + { CXL_MSER_VALID_COLUMN, "COLUMN" }, \ + { CXL_MSER_VALID_COMPONENT_ID, "COMPONENT ID" }, \ + { CXL_MSER_VALID_COMPONENT_ID_FORMAT, "COMPONENT ID PLDM FORMAT" }, \ + { CXL_MSER_VALID_SUB_CHANNEL, "SUB CHANNEL" } \ +) + +TRACE_EVENT(cxl_memory_sparing, + + TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, + struct cxl_event_mem_sparing *rec), + + TP_ARGS(cxlmd, log, rec), + + TP_STRUCT__entry( + CXL_EVT_TP_entry + + /* Memory Sparing Event */ + __field(u8, flags) + __field(u8, result) + __field(u16, validity_flags) + __field(u16, res_avail) + __field(u8, channel) + __field(u8, rank) + __field(u32, nibble_mask) + __field(u8, bank_group) + __field(u8, bank) + __field(u32, row) + __field(u16, column) + __field(u8, sub_channel) + __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE) + ), + + TP_fast_assign( + CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + __entry->hdr_uuid = CXL_EVENT_MEM_SPARING_UUID; + + /* Memory Sparing Event */ + __entry->flags = rec->flags; + __entry->result = rec->result; + __entry->validity_flags = le16_to_cpu(rec->validity_flags); + __entry->res_avail = le16_to_cpu(rec->res_avail); + __entry->channel = rec->channel; + __entry->rank = rec->rank; + __entry->nibble_mask = get_unaligned_le24(rec->nibble_mask); + __entry->bank_group = rec->bank_group; + __entry->bank = rec->bank; + __entry->row = get_unaligned_le24(rec->row); + __entry->column = le16_to_cpu(rec->column); + __entry->sub_channel = rec->sub_channel; + memcpy(__entry->comp_id, &rec->component_id, + CXL_EVENT_GEN_MED_COMP_ID_SIZE); + ), + + CXL_EVT_TP_printk("flags='%s' result=%u validity_flags='%s' " \ + "spare resource avail=%u channel=%u rank=%u " \ + "nibble_mask=%x bank_group=%u bank=%u " \ + "row=%u column=%u sub_channel=%u " \ + "comp_id=%s comp_id_pldm_valid_flags='%s' " \ + "pldm_entity_id=%s pldm_resource_id=%s", + show_mem_sparing_flags(__entry->flags), + __entry->result, + show_mem_sparing_valid_flags(__entry->validity_flags), + __entry->res_avail, __entry->channel, __entry->rank, + __entry->nibble_mask, __entry->bank_group, __entry->bank, + __entry->row, __entry->column, __entry->sub_channel, + __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE), + show_comp_id_pldm_flags(__entry->comp_id[0]), + show_pldm_entity_id(__entry->validity_flags, CXL_MSER_VALID_COMPONENT_ID, + CXL_MSER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), + show_pldm_resource_id(__entry->validity_flags, CXL_MSER_VALID_COMPONENT_ID, + CXL_MSER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id) + ) +); + #define show_poison_trace_type(type) \ __print_symbolic(type, \ { CXL_POISON_TRACE_LIST, "List" }, \ diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index ad863572ddb7..847e37be42c4 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -424,6 +424,7 @@ typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa); /** * struct cxl_root_decoder - Static platform CXL address decoder * @res: host / parent resource for region allocations + * @cache_size: extended linear cache size if exists, otherwise zero. * @region_id: region id for next region provisioning event * @hpa_to_spa: translate CXL host-physical-address to Platform system-physical-address * @platform_data: platform specific configuration data @@ -433,6 +434,7 @@ typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa); */ struct cxl_root_decoder { struct resource *res; + resource_size_t cache_size; atomic_t region_id; cxl_hpa_to_spa_fn hpa_to_spa; void *platform_data; @@ -470,7 +472,7 @@ enum cxl_config_state { * @nr_targets: number of targets * @cache_size: extended linear cache size if exists, otherwise zero. * - * State transitions are protected by the cxl_region_rwsem + * State transitions are protected by cxl_rwsem.region */ struct cxl_region_params { enum cxl_config_state state; @@ -816,7 +818,7 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, bool is_cxl_region(struct device *dev); -extern struct bus_type cxl_bus_type; +extern const struct bus_type cxl_bus_type; struct cxl_driver { const char *name; @@ -913,15 +915,4 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); #endif u16 cxl_gpf_get_dvsec(struct device *dev); - -static inline struct rw_semaphore *rwsem_read_intr_acquire(struct rw_semaphore *rwsem) -{ - if (down_read_interruptible(rwsem)) - return NULL; - - return rwsem; -} - -DEFINE_FREE(rwsem_read_release, struct rw_semaphore *, if (_T) up_read(_T)) - #endif /* __CXL_H__ */ diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 551b0ba2caa1..751478dfc410 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -254,7 +254,7 @@ enum security_cmd_enabled_bits { * @max_errors: Maximum media error records held in device cache * @enabled_cmds: All poison commands enabled in the CEL * @list_out: The poison list payload returned by device - * @lock: Protect reads of the poison list + * @mutex: Protect reads of the poison list * * Reads of the poison list are synchronized to ensure that a reader * does not get an incomplete list because their request overlapped @@ -265,7 +265,7 @@ struct cxl_poison_state { u32 max_errors; DECLARE_BITMAP(enabled_cmds, CXL_POISON_ENABLED_MAX); struct cxl_mbox_poison_out *list_out; - struct mutex lock; /* Protect reads of poison list */ + struct mutex mutex; /* Protect reads of poison list */ }; /* @@ -634,6 +634,14 @@ struct cxl_mbox_identify { 0x13, 0xb7, 0x74) /* + * Memory Sparing Event Record UUID + * CXL rev 3.2 section 8.2.10.2.1.4: Table 8-60 + */ +#define CXL_EVENT_MEM_SPARING_UUID \ + UUID_INIT(0xe71f3a40, 0x2d29, 0x4092, 0x8a, 0x39, 0x4d, 0x1c, 0x96, \ + 0x6c, 0x7c, 0x65) + +/* * Get Event Records output payload * CXL rev 3.0 section 8.2.9.2.2; Table 8-50 */ diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 785aa2af5eaa..bd100ac31672 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -379,7 +379,7 @@ static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox, { int rc; - mutex_lock_io(&cxl_mbox->mbox_mutex); + mutex_lock(&cxl_mbox->mbox_mutex); rc = __cxl_pci_mbox_send_cmd(cxl_mbox, cmd); mutex_unlock(&cxl_mbox->mbox_mutex); diff --git a/include/cxl/event.h b/include/cxl/event.h index f9ae1796da85..6fd90f9cc203 100644 --- a/include/cxl/event.h +++ b/include/cxl/event.h @@ -19,7 +19,9 @@ struct cxl_event_record_hdr { __le64 timestamp; u8 maint_op_class; u8 maint_op_sub_class; - u8 reserved[14]; + __le16 ld_id; + u8 head_id; + u8 reserved[11]; } __packed; struct cxl_event_media_hdr { @@ -108,11 +110,43 @@ struct cxl_event_mem_module { u8 reserved[0x2a]; } __packed; +/* + * Memory Sparing Event Record - MSER + * CXL rev 3.2 section 8.2.10.2.1.4; Table 8-60 + */ +struct cxl_event_mem_sparing { + struct cxl_event_record_hdr hdr; + /* + * The fields maintenance operation class and maintenance operation + * subclass defined in the Memory Sparing Event Record are the + * duplication of the same in the common event record. Thus defined + * as reserved and to be removed after the spec correction. + */ + u8 rsv1; + u8 rsv2; + u8 flags; + u8 result; + __le16 validity_flags; + u8 reserved1[6]; + __le16 res_avail; + u8 channel; + u8 rank; + u8 nibble_mask[3]; + u8 bank_group; + u8 bank; + u8 row[3]; + __le16 column; + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + u8 sub_channel; + u8 reserved2[0x25]; +} __packed; + union cxl_event { struct cxl_event_generic generic; struct cxl_event_gen_media gen_media; struct cxl_event_dram dram; struct cxl_event_mem_module mem_module; + struct cxl_event_mem_sparing mem_sparing; /* dram & gen_media event header */ struct cxl_event_media_hdr media_hdr; } __packed; @@ -131,6 +165,7 @@ enum cxl_event_type { CXL_CPER_EVENT_GEN_MEDIA, CXL_CPER_EVENT_DRAM, CXL_CPER_EVENT_MEM_MODULE, + CXL_CPER_EVENT_MEM_SPARING, }; #define CPER_CXL_DEVICE_ID_VALID BIT(0) diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index bee606bebaca..2573585b7f06 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -3,6 +3,8 @@ #define _LINUX_CLEANUP_H #include <linux/compiler.h> +#include <linux/err.h> +#include <linux/args.h> /** * DOC: scope-based cleanup helpers @@ -61,9 +63,20 @@ * Observe the lock is held for the remainder of the "if ()" block not * the remainder of "func()". * - * Now, when a function uses both __free() and guard(), or multiple - * instances of __free(), the LIFO order of variable definition order - * matters. GCC documentation says: + * The ACQUIRE() macro can be used in all places that guard() can be + * used and additionally support conditional locks:: + * + * DEFINE_GUARD_COND(pci_dev, _try, pci_dev_trylock(_T)) + * ... + * ACQUIRE(pci_dev_try, lock)(dev); + * rc = ACQUIRE_ERR(pci_dev_try, &lock); + * if (rc) + * return rc; + * // @lock is held + * + * Now, when a function uses both __free() and guard()/ACQUIRE(), or + * multiple instances of __free(), the LIFO order of variable definition + * order matters. GCC documentation says: * * "When multiple variables in the same scope have cleanup attributes, * at exit from the scope their associated cleanup functions are run in @@ -313,14 +326,46 @@ _label: \ * acquire fails. * * Only for conditional locks. + * + * ACQUIRE(name, var): + * a named instance of the (guard) class, suitable for conditional + * locks when paired with ACQUIRE_ERR(). + * + * ACQUIRE_ERR(name, &var): + * a helper that is effectively a PTR_ERR() conversion of the guard + * pointer. Returns 0 when the lock was acquired and a negative + * error code otherwise. */ #define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond -#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \ - static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ - { return (void *)(__force unsigned long)*(_exp); } +#define __GUARD_IS_ERR(_ptr) \ + ({ \ + unsigned long _rc = (__force unsigned long)(_ptr); \ + unlikely((_rc - 1) >= -MAX_ERRNO - 1); \ + }) + +#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \ + static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ + { \ + void *_ptr = (void *)(__force unsigned long)*(_exp); \ + if (IS_ERR(_ptr)) { \ + _ptr = NULL; \ + } \ + return _ptr; \ + } \ + static inline int class_##_name##_lock_err(class_##_name##_t *_T) \ + { \ + long _rc = (__force unsigned long)*(_exp); \ + if (!_rc) { \ + _rc = -EBUSY; \ + } \ + if (!IS_ERR_VALUE(_rc)) { \ + _rc = 0; \ + } \ + return _rc; \ + } #define DEFINE_CLASS_IS_GUARD(_name) \ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ @@ -331,23 +376,37 @@ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond __DEFINE_GUARD_LOCK_PTR(_name, _T) #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ - DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ + DEFINE_CLASS(_name, _type, if (!__GUARD_IS_ERR(_T)) { _unlock; }, ({ _lock; _T; }), _type _T); \ DEFINE_CLASS_IS_GUARD(_name) -#define DEFINE_GUARD_COND(_name, _ext, _condlock) \ +#define DEFINE_GUARD_COND_4(_name, _ext, _lock, _cond) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ - ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \ + ({ void *_t = _T; int _RET = (_lock); if (_T && !(_cond)) _t = ERR_PTR(_RET); _t; }), \ class_##_name##_t _T) \ static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ - { return class_##_name##_lock_ptr(_T); } + { return class_##_name##_lock_ptr(_T); } \ + static inline int class_##_name##_ext##_lock_err(class_##_name##_t *_T) \ + { return class_##_name##_lock_err(_T); } + +/* + * Default binary condition; success on 'true'. + */ +#define DEFINE_GUARD_COND_3(_name, _ext, _lock) \ + DEFINE_GUARD_COND_4(_name, _ext, _lock, _RET) + +#define DEFINE_GUARD_COND(X...) CONCATENATE(DEFINE_GUARD_COND_, COUNT_ARGS(X))(X) #define guard(_name) \ CLASS(_name, __UNIQUE_ID(guard)) #define __guard_ptr(_name) class_##_name##_lock_ptr +#define __guard_err(_name) class_##_name##_lock_err #define __is_cond_ptr(_name) class_##_name##_is_conditional +#define ACQUIRE(_name, _var) CLASS(_name, _var) +#define ACQUIRE_ERR(_name, _var) __guard_err(_name)(_var) + /* * Helper macro for scoped_guard(). * @@ -409,7 +468,7 @@ typedef struct { \ \ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ { \ - if (_T->lock) { _unlock; } \ + if (!__GUARD_IS_ERR(_T->lock)) { _unlock; } \ } \ \ __DEFINE_GUARD_LOCK_PTR(_name, &_T->lock) @@ -441,15 +500,22 @@ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ __DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_0(_name, _lock) -#define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \ +#define DEFINE_LOCK_GUARD_1_COND_4(_name, _ext, _lock, _cond) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ - if (_T->lock && !(_condlock)) _T->lock = NULL; \ + int _RET = (_lock); \ + if (_T->lock && !(_cond)) _T->lock = ERR_PTR(_RET);\ _t; }), \ typeof_member(class_##_name##_t, lock) l) \ static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ - { return class_##_name##_lock_ptr(_T); } + { return class_##_name##_lock_ptr(_T); } \ + static inline int class_##_name##_ext##_lock_err(class_##_name##_t *_T) \ + { return class_##_name##_lock_err(_T); } + +#define DEFINE_LOCK_GUARD_1_COND_3(_name, _ext, _lock) \ + DEFINE_LOCK_GUARD_1_COND_4(_name, _ext, _lock, _RET) +#define DEFINE_LOCK_GUARD_1_COND(X...) CONCATENATE(DEFINE_LOCK_GUARD_1_COND_, COUNT_ARGS(X))(X) #endif /* _LINUX_CLEANUP_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 00afd341d293..847b81ca6436 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -227,7 +227,7 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T)) -DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0) +DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T), _RET == 0) extern unsigned long mutex_get_owner(struct mutex *lock); diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index c8b543d428b0..cbafdc12e743 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -240,10 +240,11 @@ extern void up_write(struct rw_semaphore *sem); DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T)) -DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T) == 0) +DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T), _RET == 0) DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T)) +DEFINE_GUARD_COND(rwsem_write, _kill, down_write_killable(_T), _RET == 0) /* * downgrade write lock to read lock diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 31a2d73c963f..d07f14cb7aa4 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -62,7 +62,6 @@ cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += $(CXL_CORE_SRC)/pmu.o cxl_core-y += $(CXL_CORE_SRC)/cdat.o cxl_core-y += $(CXL_CORE_SRC)/ras.o -cxl_core-y += $(CXL_CORE_SRC)/acpi.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o diff --git a/tools/testing/cxl/config_check.c b/tools/testing/cxl/config_check.c index 0902c5d6e410..a80bc2c062fe 100644 --- a/tools/testing/cxl/config_check.c +++ b/tools/testing/cxl/config_check.c @@ -14,4 +14,5 @@ void check(void) BUILD_BUG_ON(!IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)); BUILD_BUG_ON(!IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST)); BUILD_BUG_ON(!IS_ENABLED(CONFIG_DEBUG_FS)); + BUILD_BUG_ON(!IS_ENABLED(CONFIG_MEMORY_HOTPLUG)); } diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 8a5815ca870d..6a25cca5636f 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -2,6 +2,7 @@ // Copyright(c) 2021 Intel Corporation. All rights reserved. #include <linux/platform_device.h> +#include <linux/memory_hotplug.h> #include <linux/genalloc.h> #include <linux/module.h> #include <linux/mutex.h> @@ -1328,6 +1329,7 @@ err_mem: static __init int cxl_test_init(void) { int rc, i; + struct range mappable; cxl_acpi_test(); cxl_core_test(); @@ -1342,8 +1344,11 @@ static __init int cxl_test_init(void) rc = -ENOMEM; goto err_gen_pool_create; } + mappable = mhp_get_pluggable_range(true); - rc = gen_pool_add(cxl_mock_pool, iomem_resource.end + 1 - SZ_64G, + rc = gen_pool_add(cxl_mock_pool, + min(iomem_resource.end + 1 - SZ_64G, + mappable.end + 1 - SZ_64G), SZ_64G, NUMA_NO_NODE); if (rc) goto err_gen_pool_add; |