diff options
Diffstat (limited to 'drivers')
75 files changed, 3765 insertions, 747 deletions
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 2cdbd08b30e4..ca00a5dbcf75 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -461,7 +461,7 @@ config ACPI_HED config ACPI_BGRT bool "Boottime Graphics Resource Table support" - depends on EFI && (X86 || ARM64 || LOONGARCH) + depends on EFI help This driver adds support for exposing the ACPI Boottime Graphics Resource Table, which allows the operating system to obtain diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 4958301f5417..5a36d57289b4 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -74,7 +74,6 @@ struct memory_target { struct node_cache_attrs cache_attrs; u8 gen_port_device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE]; bool registered; - bool ext_updated; /* externally updated */ }; struct memory_initiator { @@ -368,35 +367,6 @@ static void hmat_update_target_access(struct memory_target *target, } } -int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, - enum access_coordinate_class access) -{ - struct memory_target *target; - int pxm; - - if (nid == NUMA_NO_NODE) - return -EINVAL; - - pxm = node_to_pxm(nid); - guard(mutex)(&target_lock); - target = find_mem_target(pxm); - if (!target) - return -ENODEV; - - hmat_update_target_access(target, ACPI_HMAT_READ_LATENCY, - coord->read_latency, access); - hmat_update_target_access(target, ACPI_HMAT_WRITE_LATENCY, - coord->write_latency, access); - hmat_update_target_access(target, ACPI_HMAT_READ_BANDWIDTH, - coord->read_bandwidth, access); - hmat_update_target_access(target, ACPI_HMAT_WRITE_BANDWIDTH, - coord->write_bandwidth, access); - target->ext_updated = true; - - return 0; -} -EXPORT_SYMBOL_GPL(hmat_update_target_coordinates); - static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc) { struct memory_locality *loc; @@ -773,10 +743,6 @@ static void hmat_update_target_attrs(struct memory_target *target, u32 best = 0; int i; - /* Don't update if an external agent has changed the data. */ - if (target->ext_updated) - return; - /* Don't update for generic port if there's no device handle */ if ((access == NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL || access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) && diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 54baa23a9e5a..b261dbb16230 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -844,13 +844,35 @@ acpi_fwnode_get_named_child_node(const struct fwnode_handle *fwnode, return NULL; } +static unsigned int acpi_fwnode_get_args_count(struct fwnode_handle *fwnode, + const char *nargs_prop) +{ + const struct acpi_device_data *data; + const union acpi_object *obj; + int ret; + + data = acpi_device_data_of_node(fwnode); + if (!data) + return 0; + + ret = acpi_data_get_property(data, nargs_prop, ACPI_TYPE_INTEGER, &obj); + if (ret) + return 0; + + return obj->integer.value; +} + static int acpi_get_ref_args(struct fwnode_reference_args *args, struct fwnode_handle *ref_fwnode, + const char *nargs_prop, const union acpi_object **element, const union acpi_object *end, size_t num_args) { u32 nargs = 0, i; + if (nargs_prop) + num_args = acpi_fwnode_get_args_count(ref_fwnode, nargs_prop); + /* * Assume the following integer elements are all args. Stop counting on * the first reference (possibly represented as a string) or end of the @@ -922,45 +944,10 @@ static struct fwnode_handle *acpi_parse_string_ref(const struct fwnode_handle *f return &dn->fwnode; } -/** - * __acpi_node_get_property_reference - returns handle to the referenced object - * @fwnode: Firmware node to get the property from - * @propname: Name of the property - * @index: Index of the reference to return - * @num_args: Maximum number of arguments after each reference - * @args: Location to store the returned reference with optional arguments - * (may be NULL) - * - * Find property with @name, verifify that it is a package containing at least - * one object reference and if so, store the ACPI device object pointer to the - * target object in @args->adev. If the reference includes arguments, store - * them in the @args->args[] array. - * - * If there's more than one reference in the property value package, @index is - * used to select the one to return. - * - * It is possible to leave holes in the property value set like in the - * example below: - * - * Package () { - * "cs-gpios", - * Package () { - * ^GPIO, 19, 0, 0, - * ^GPIO, 20, 0, 0, - * 0, - * ^GPIO, 21, 0, 0, - * } - * } - * - * Calling this function with index %2 or index %3 return %-ENOENT. If the - * property does not contain any more values %-ENOENT is returned. The NULL - * entry must be single integer and preferably contain value %0. - * - * Return: %0 on success, negative error code on failure. - */ -int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, - const char *propname, size_t index, size_t num_args, - struct fwnode_reference_args *args) +static int acpi_fwnode_get_reference_args(const struct fwnode_handle *fwnode, + const char *propname, const char *nargs_prop, + unsigned int args_count, unsigned int index, + struct fwnode_reference_args *args) { const union acpi_object *element, *end; const union acpi_object *obj; @@ -1036,10 +1023,10 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, return -EINVAL; element++; - ret = acpi_get_ref_args(idx == index ? args : NULL, acpi_fwnode_handle(device), - &element, end, num_args); + nargs_prop, &element, end, + args_count); if (ret < 0) return ret; @@ -1054,10 +1041,9 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, return -EINVAL; element++; - ret = acpi_get_ref_args(idx == index ? args : NULL, - ref_fwnode, &element, end, - num_args); + ref_fwnode, nargs_prop, &element, end, + args_count); if (ret < 0) return ret; @@ -1079,6 +1065,50 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, return -ENOENT; } + +/** + * __acpi_node_get_property_reference - returns handle to the referenced object + * @fwnode: Firmware node to get the property from + * @propname: Name of the property + * @index: Index of the reference to return + * @num_args: Maximum number of arguments after each reference + * @args: Location to store the returned reference with optional arguments + * (may be NULL) + * + * Find property with @name, verifify that it is a package containing at least + * one object reference and if so, store the ACPI device object pointer to the + * target object in @args->adev. If the reference includes arguments, store + * them in the @args->args[] array. + * + * If there's more than one reference in the property value package, @index is + * used to select the one to return. + * + * It is possible to leave holes in the property value set like in the + * example below: + * + * Package () { + * "cs-gpios", + * Package () { + * ^GPIO, 19, 0, 0, + * ^GPIO, 20, 0, 0, + * 0, + * ^GPIO, 21, 0, 0, + * } + * } + * + * Calling this function with index %2 or index %3 return %-ENOENT. If the + * property does not contain any more values %-ENOENT is returned. The NULL + * entry must be single integer and preferably contain value %0. + * + * Return: %0 on success, negative error code on failure. + */ +int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, + const char *propname, size_t index, + size_t num_args, + struct fwnode_reference_args *args) +{ + return acpi_fwnode_get_reference_args(fwnode, propname, NULL, index, num_args, args); +} EXPORT_SYMBOL_GPL(__acpi_node_get_property_reference); static int acpi_data_prop_read_single(const struct acpi_device_data *data, @@ -1598,16 +1628,6 @@ acpi_fwnode_property_read_string_array(const struct fwnode_handle *fwnode, val, nval); } -static int -acpi_fwnode_get_reference_args(const struct fwnode_handle *fwnode, - const char *prop, const char *nargs_prop, - unsigned int args_count, unsigned int index, - struct fwnode_reference_args *args) -{ - return __acpi_node_get_property_reference(fwnode, prop, index, - args_count, args); -} - static const char *acpi_fwnode_get_name(const struct fwnode_handle *fwnode) { const struct acpi_device *adev; diff --git a/drivers/acpi/riscv/irq.c b/drivers/acpi/riscv/irq.c index cced960c2aef..d9a2154d6c6a 100644 --- a/drivers/acpi/riscv/irq.c +++ b/drivers/acpi/riscv/irq.c @@ -10,6 +10,8 @@ #include "init.h" +#define RISCV_ACPI_INTC_FLAG_PENDING BIT(0) + struct riscv_ext_intc_list { acpi_handle handle; u32 gsi_base; @@ -17,6 +19,7 @@ struct riscv_ext_intc_list { u32 nr_idcs; u32 id; u32 type; + u32 flag; struct list_head list; }; @@ -69,6 +72,22 @@ static acpi_status riscv_acpi_update_gsi_handle(u32 gsi_base, acpi_handle handle return AE_NOT_FOUND; } +int riscv_acpi_update_gsi_range(u32 gsi_base, u32 nr_irqs) +{ + struct riscv_ext_intc_list *ext_intc_element; + + list_for_each_entry(ext_intc_element, &ext_intc_list, list) { + if (gsi_base == ext_intc_element->gsi_base && + (ext_intc_element->flag & RISCV_ACPI_INTC_FLAG_PENDING)) { + ext_intc_element->nr_irqs = nr_irqs; + ext_intc_element->flag &= ~RISCV_ACPI_INTC_FLAG_PENDING; + return 0; + } + } + + return -ENODEV; +} + int riscv_acpi_get_gsi_info(struct fwnode_handle *fwnode, u32 *gsi_base, u32 *id, u32 *nr_irqs, u32 *nr_idcs) { @@ -115,20 +134,67 @@ struct fwnode_handle *riscv_acpi_get_gsi_domain_id(u32 gsi) static int __init riscv_acpi_register_ext_intc(u32 gsi_base, u32 nr_irqs, u32 nr_idcs, u32 id, u32 type) { - struct riscv_ext_intc_list *ext_intc_element; + struct riscv_ext_intc_list *ext_intc_element, *node, *prev; ext_intc_element = kzalloc(sizeof(*ext_intc_element), GFP_KERNEL); if (!ext_intc_element) return -ENOMEM; ext_intc_element->gsi_base = gsi_base; - ext_intc_element->nr_irqs = nr_irqs; + + /* If nr_irqs is zero, indicate it in flag and set to max range possible */ + if (nr_irqs) { + ext_intc_element->nr_irqs = nr_irqs; + } else { + ext_intc_element->flag |= RISCV_ACPI_INTC_FLAG_PENDING; + ext_intc_element->nr_irqs = U32_MAX - ext_intc_element->gsi_base; + } + ext_intc_element->nr_idcs = nr_idcs; ext_intc_element->id = id; - list_add_tail(&ext_intc_element->list, &ext_intc_list); + list_for_each_entry(node, &ext_intc_list, list) { + if (node->gsi_base < ext_intc_element->gsi_base) + break; + } + + /* Adjust the previous node's GSI range if that has pending registration */ + prev = list_prev_entry(node, list); + if (!list_entry_is_head(prev, &ext_intc_list, list)) { + if (prev->flag & RISCV_ACPI_INTC_FLAG_PENDING) + prev->nr_irqs = ext_intc_element->gsi_base - prev->gsi_base; + } + + list_add_tail(&ext_intc_element->list, &node->list); return 0; } +static acpi_status __init riscv_acpi_create_gsi_map_smsi(acpi_handle handle, u32 level, + void *context, void **return_value) +{ + acpi_status status; + u64 gbase; + + if (!acpi_has_method(handle, "_GSB")) { + acpi_handle_err(handle, "_GSB method not found\n"); + return AE_ERROR; + } + + status = acpi_evaluate_integer(handle, "_GSB", NULL, &gbase); + if (ACPI_FAILURE(status)) { + acpi_handle_err(handle, "failed to evaluate _GSB method\n"); + return status; + } + + riscv_acpi_register_ext_intc(gbase, 0, 0, 0, ACPI_RISCV_IRQCHIP_SMSI); + status = riscv_acpi_update_gsi_handle((u32)gbase, handle); + if (ACPI_FAILURE(status)) { + acpi_handle_err(handle, "failed to find the GSI mapping entry\n"); + return status; + } + + return AE_OK; +} + static acpi_status __init riscv_acpi_create_gsi_map(acpi_handle handle, u32 level, void *context, void **return_value) { @@ -183,6 +249,9 @@ void __init riscv_acpi_init_gsi_mapping(void) if (acpi_table_parse_madt(ACPI_MADT_TYPE_APLIC, riscv_acpi_aplic_parse_madt, 0) > 0) acpi_get_devices("RSCV0002", riscv_acpi_create_gsi_map, NULL, NULL); + + /* Unlike PLIC/APLIC, SYSMSI doesn't have MADT */ + acpi_get_devices("RSCV0006", riscv_acpi_create_gsi_map_smsi, NULL, NULL); } static acpi_handle riscv_acpi_get_gsi_handle(u32 gsi) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 065abe56f440..ef16d58b2949 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -861,6 +861,8 @@ static const char * const acpi_honor_dep_ids[] = { "INTC10CF", /* IVSC (MTL) driver must be loaded to allow i2c access to camera sensors */ "RSCV0001", /* RISC-V PLIC */ "RSCV0002", /* RISC-V APLIC */ + "RSCV0005", /* RISC-V SBI MPXY MBOX */ + "RSCV0006", /* RISC-V RPMI SYSMSI */ "PNP0C0F", /* PCI Link Device */ NULL }; diff --git a/drivers/base/node.c b/drivers/base/node.c index 67b01d579737..3e2329ccb618 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -249,6 +249,44 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, EXPORT_SYMBOL_GPL(node_set_perf_attrs); /** + * node_update_perf_attrs - Update the performance values for given access class + * @nid: Node identifier to be updated + * @coord: Heterogeneous memory performance coordinates + * @access: The access class for the given attributes + */ +void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord, + enum access_coordinate_class access) +{ + struct node_access_nodes *access_node; + struct node *node; + int i; + + if (WARN_ON_ONCE(!node_online(nid))) + return; + + node = node_devices[nid]; + list_for_each_entry(access_node, &node->access_list, list_node) { + if (access_node->access != access) + continue; + + access_node->coord = *coord; + for (i = 0; access_attrs[i]; i++) { + sysfs_notify(&access_node->dev.kobj, + NULL, access_attrs[i]->name); + } + break; + } + + /* When setting CPU access coordinates, update mempolicy */ + if (access != ACCESS_COORDINATE_CPU) + return; + + if (mempolicy_set_node_perf(nid, coord)) + pr_info("failed to set mempolicy attrs for node %d\n", nid); +} +EXPORT_SYMBOL_GPL(node_update_perf_attrs); + +/** * struct node_cache_info - Internal tracking for memory node caches * @dev: Device represeting the cache level * @node: List element for tracking in the node diff --git a/drivers/base/property.c b/drivers/base/property.c index f626d5bbe806..6a63860579dd 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -578,7 +578,7 @@ EXPORT_SYMBOL_GPL(fwnode_property_match_property_string); * @prop: The name of the property * @nargs_prop: The name of the property telling the number of * arguments in the referred node. NULL if @nargs is known, - * otherwise @nargs is ignored. Only relevant on OF. + * otherwise @nargs is ignored. * @nargs: Number of arguments. Ignored if @nargs_prop is non-NULL. * @index: Index of the reference, from zero onwards. * @args: Result structure with reference and integer arguments. diff --git a/drivers/cdx/Kconfig b/drivers/cdx/Kconfig index 3af41f51cf38..1f1e360507d7 100644 --- a/drivers/cdx/Kconfig +++ b/drivers/cdx/Kconfig @@ -8,7 +8,6 @@ config CDX_BUS bool "CDX Bus driver" depends on OF && ARM64 || COMPILE_TEST - select GENERIC_MSI_IRQ help Driver to enable Composable DMA Transfer(CDX) Bus. CDX bus exposes Fabric devices which uses composable DMA IP to the diff --git a/drivers/cdx/cdx.c b/drivers/cdx/cdx.c index 092306ca2541..3d50f8cd9c0b 100644 --- a/drivers/cdx/cdx.c +++ b/drivers/cdx/cdx.c @@ -310,7 +310,7 @@ static int cdx_probe(struct device *dev) * Setup MSI device data so that generic MSI alloc/free can * be used by the device driver. */ - if (cdx->msi_domain) { + if (IS_ENABLED(CONFIG_GENERIC_MSI_IRQ) && cdx->msi_domain) { error = msi_setup_device_data(&cdx_dev->dev); if (error) return error; @@ -833,7 +833,7 @@ int cdx_device_add(struct cdx_dev_params *dev_params) ((cdx->id << CDX_CONTROLLER_ID_SHIFT) | (cdx_dev->bus_num & CDX_BUS_NUM_MASK)), cdx_dev->dev_num); - if (cdx->msi_domain) { + if (IS_ENABLED(CONFIG_GENERIC_MSI_IRQ) && cdx->msi_domain) { cdx_dev->num_msi = dev_params->num_msi; dev_set_msi_domain(&cdx_dev->dev, cdx->msi_domain); } diff --git a/drivers/cdx/controller/Kconfig b/drivers/cdx/controller/Kconfig index 0641a4c21e66..a480b62cbd1f 100644 --- a/drivers/cdx/controller/Kconfig +++ b/drivers/cdx/controller/Kconfig @@ -10,7 +10,6 @@ if CDX_BUS config CDX_CONTROLLER tristate "CDX bus controller" depends on HAS_DMA - select GENERIC_MSI_IRQ select REMOTEPROC select RPMSG help diff --git a/drivers/cdx/controller/cdx_controller.c b/drivers/cdx/controller/cdx_controller.c index 3f8b9041babf..280f207735da 100644 --- a/drivers/cdx/controller/cdx_controller.c +++ b/drivers/cdx/controller/cdx_controller.c @@ -193,7 +193,8 @@ static int xlnx_cdx_probe(struct platform_device *pdev) cdx->ops = &cdx_ops; /* Create MSI domain */ - cdx->msi_domain = cdx_msi_domain_init(&pdev->dev); + if (IS_ENABLED(CONFIG_GENERIC_MSI_IRQ)) + cdx->msi_domain = cdx_msi_domain_init(&pdev->dev); if (!cdx->msi_domain) { ret = dev_err_probe(&pdev->dev, -ENODEV, "cdx_msi_domain_init() failed"); goto cdx_msi_fail; diff --git a/drivers/char/hw_random/n2rng.h b/drivers/char/hw_random/n2rng.h index 9a870f5dc371..7612f15a261f 100644 --- a/drivers/char/hw_random/n2rng.h +++ b/drivers/char/hw_random/n2rng.h @@ -48,7 +48,7 @@ #define HV_RNG_NUM_CONTROL 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern unsigned long sun4v_rng_get_diag_ctl(void); extern unsigned long sun4v_rng_ctl_read_v1(unsigned long ctl_regs_ra, unsigned long *state, @@ -147,6 +147,6 @@ struct n2rng { #define N2RNG_BUSY_LIMIT 100 #define N2RNG_HCHECK_LIMIT 100 -#endif /* !(__ASSEMBLY__) */ +#endif /* !(__ASSEMBLER__) */ #endif /* _N2RNG_H */ diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 4d56475f94fc..6fc5ae76b483 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -501,6 +501,15 @@ config COMMON_CLK_SP7021 Not all features of the PLL are currently supported by the driver. +config COMMON_CLK_RPMI + tristate "Clock driver based on RISC-V RPMI" + depends on RISCV || COMPILE_TEST + depends on MAILBOX + default RISCV + help + Support for clocks based on the clock service group defined by + the RISC-V platform management interface (RPMI) specification. + source "drivers/clk/actions/Kconfig" source "drivers/clk/analogbits/Kconfig" source "drivers/clk/baikal-t1/Kconfig" diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index 18ed29cfdc11..b74a1767ca27 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_COMMON_CLK_PWM) += clk-pwm.o obj-$(CONFIG_CLK_QORIQ) += clk-qoriq.o obj-$(CONFIG_COMMON_CLK_RK808) += clk-rk808.o obj-$(CONFIG_COMMON_CLK_RP1) += clk-rp1.o +obj-$(CONFIG_COMMON_CLK_RPMI) += clk-rpmi.o obj-$(CONFIG_COMMON_CLK_HI655X) += clk-hi655x.o obj-$(CONFIG_COMMON_CLK_S2MPS11) += clk-s2mps11.o obj-$(CONFIG_COMMON_CLK_SCMI) += clk-scmi.o diff --git a/drivers/clk/clk-rpmi.c b/drivers/clk/clk-rpmi.c new file mode 100644 index 000000000000..921296aafa68 --- /dev/null +++ b/drivers/clk/clk-rpmi.c @@ -0,0 +1,620 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V MPXY Based Clock Driver + * + * Copyright (C) 2025 Ventana Micro Systems Ltd. + */ + +#include <linux/clk-provider.h> +#include <linux/err.h> +#include <linux/mailbox_client.h> +#include <linux/mailbox/riscv-rpmi-message.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/wordpart.h> + +#define RPMI_CLK_DISCRETE_MAX_NUM_RATES 16 +#define RPMI_CLK_NAME_LEN 16 + +#define to_rpmi_clk(clk) container_of(clk, struct rpmi_clk, hw) + +enum rpmi_clk_config { + RPMI_CLK_DISABLE = 0, + RPMI_CLK_ENABLE = 1, + RPMI_CLK_CONFIG_MAX_IDX +}; + +#define RPMI_CLK_TYPE_MASK GENMASK(1, 0) +enum rpmi_clk_type { + RPMI_CLK_DISCRETE = 0, + RPMI_CLK_LINEAR = 1, + RPMI_CLK_TYPE_MAX_IDX +}; + +struct rpmi_clk_context { + struct device *dev; + struct mbox_chan *chan; + struct mbox_client client; + u32 max_msg_data_size; +}; + +/* + * rpmi_clk_rates represents the rates format + * as specified by the RPMI specification. + * No other data format (e.g., struct linear_range) + * is required to avoid to and from conversion. + */ +union rpmi_clk_rates { + u64 discrete[RPMI_CLK_DISCRETE_MAX_NUM_RATES]; + struct { + u64 min; + u64 max; + u64 step; + } linear; +}; + +struct rpmi_clk { + struct rpmi_clk_context *context; + u32 id; + u32 num_rates; + u32 transition_latency; + enum rpmi_clk_type type; + union rpmi_clk_rates *rates; + char name[RPMI_CLK_NAME_LEN]; + struct clk_hw hw; +}; + +struct rpmi_clk_rate_discrete { + __le32 lo; + __le32 hi; +}; + +struct rpmi_clk_rate_linear { + __le32 min_lo; + __le32 min_hi; + __le32 max_lo; + __le32 max_hi; + __le32 step_lo; + __le32 step_hi; +}; + +struct rpmi_get_num_clocks_rx { + __le32 status; + __le32 num_clocks; +}; + +struct rpmi_get_attrs_tx { + __le32 clkid; +}; + +struct rpmi_get_attrs_rx { + __le32 status; + __le32 flags; + __le32 num_rates; + __le32 transition_latency; + char name[RPMI_CLK_NAME_LEN]; +}; + +struct rpmi_get_supp_rates_tx { + __le32 clkid; + __le32 clk_rate_idx; +}; + +struct rpmi_get_supp_rates_rx { + __le32 status; + __le32 flags; + __le32 remaining; + __le32 returned; + __le32 rates[]; +}; + +struct rpmi_get_rate_tx { + __le32 clkid; +}; + +struct rpmi_get_rate_rx { + __le32 status; + __le32 lo; + __le32 hi; +}; + +struct rpmi_set_rate_tx { + __le32 clkid; + __le32 flags; + __le32 lo; + __le32 hi; +}; + +struct rpmi_set_rate_rx { + __le32 status; +}; + +struct rpmi_set_config_tx { + __le32 clkid; + __le32 config; +}; + +struct rpmi_set_config_rx { + __le32 status; +}; + +static inline u64 rpmi_clkrate_u64(u32 __hi, u32 __lo) +{ + return (((u64)(__hi) << 32) | (u32)(__lo)); +} + +static u32 rpmi_clk_get_num_clocks(struct rpmi_clk_context *context) +{ + struct rpmi_get_num_clocks_rx rx, *resp; + struct rpmi_mbox_message msg; + int ret; + + rpmi_mbox_init_send_with_response(&msg, RPMI_CLK_SRV_GET_NUM_CLOCKS, + NULL, 0, &rx, sizeof(rx)); + + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return 0; + + resp = rpmi_mbox_get_msg_response(&msg); + if (!resp || resp->status) + return 0; + + return le32_to_cpu(resp->num_clocks); +} + +static int rpmi_clk_get_attrs(u32 clkid, struct rpmi_clk *rpmi_clk) +{ + struct rpmi_clk_context *context = rpmi_clk->context; + struct rpmi_mbox_message msg; + struct rpmi_get_attrs_tx tx; + struct rpmi_get_attrs_rx rx, *resp; + u8 format; + int ret; + + tx.clkid = cpu_to_le32(clkid); + rpmi_mbox_init_send_with_response(&msg, RPMI_CLK_SRV_GET_ATTRIBUTES, + &tx, sizeof(tx), &rx, sizeof(rx)); + + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return ret; + + resp = rpmi_mbox_get_msg_response(&msg); + if (!resp) + return -EINVAL; + if (resp->status) + return rpmi_to_linux_error(le32_to_cpu(resp->status)); + + rpmi_clk->id = clkid; + rpmi_clk->num_rates = le32_to_cpu(resp->num_rates); + rpmi_clk->transition_latency = le32_to_cpu(resp->transition_latency); + strscpy(rpmi_clk->name, resp->name, RPMI_CLK_NAME_LEN); + + format = le32_to_cpu(resp->flags) & RPMI_CLK_TYPE_MASK; + if (format >= RPMI_CLK_TYPE_MAX_IDX) + return -EINVAL; + + rpmi_clk->type = format; + + return 0; +} + +static int rpmi_clk_get_supported_rates(u32 clkid, struct rpmi_clk *rpmi_clk) +{ + struct rpmi_clk_context *context = rpmi_clk->context; + struct rpmi_clk_rate_discrete *rate_discrete; + struct rpmi_clk_rate_linear *rate_linear; + struct rpmi_get_supp_rates_tx tx; + struct rpmi_get_supp_rates_rx *resp; + struct rpmi_mbox_message msg; + size_t clk_rate_idx; + int ret, rateidx, j; + + tx.clkid = cpu_to_le32(clkid); + tx.clk_rate_idx = 0; + + /* + * Make sure we allocate rx buffer sufficient to be accommodate all + * the rates sent in one RPMI message. + */ + struct rpmi_get_supp_rates_rx *rx __free(kfree) = + kzalloc(context->max_msg_data_size, GFP_KERNEL); + if (!rx) + return -ENOMEM; + + rpmi_mbox_init_send_with_response(&msg, RPMI_CLK_SRV_GET_SUPPORTED_RATES, + &tx, sizeof(tx), rx, context->max_msg_data_size); + + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return ret; + + resp = rpmi_mbox_get_msg_response(&msg); + if (!resp) + return -EINVAL; + if (resp->status) + return rpmi_to_linux_error(le32_to_cpu(resp->status)); + if (!le32_to_cpu(resp->returned)) + return -EINVAL; + + if (rpmi_clk->type == RPMI_CLK_DISCRETE) { + rate_discrete = (struct rpmi_clk_rate_discrete *)resp->rates; + + for (rateidx = 0; rateidx < le32_to_cpu(resp->returned); rateidx++) { + rpmi_clk->rates->discrete[rateidx] = + rpmi_clkrate_u64(le32_to_cpu(rate_discrete[rateidx].hi), + le32_to_cpu(rate_discrete[rateidx].lo)); + } + + /* + * Keep sending the request message until all + * the rates are received. + */ + clk_rate_idx = 0; + while (le32_to_cpu(resp->remaining)) { + clk_rate_idx += le32_to_cpu(resp->returned); + tx.clk_rate_idx = cpu_to_le32(clk_rate_idx); + + rpmi_mbox_init_send_with_response(&msg, + RPMI_CLK_SRV_GET_SUPPORTED_RATES, + &tx, sizeof(tx), + rx, context->max_msg_data_size); + + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return ret; + + resp = rpmi_mbox_get_msg_response(&msg); + if (!resp) + return -EINVAL; + if (resp->status) + return rpmi_to_linux_error(le32_to_cpu(resp->status)); + if (!le32_to_cpu(resp->returned)) + return -EINVAL; + + for (j = 0; j < le32_to_cpu(resp->returned); j++) { + if (rateidx >= clk_rate_idx + le32_to_cpu(resp->returned)) + break; + rpmi_clk->rates->discrete[rateidx++] = + rpmi_clkrate_u64(le32_to_cpu(rate_discrete[j].hi), + le32_to_cpu(rate_discrete[j].lo)); + } + } + } else if (rpmi_clk->type == RPMI_CLK_LINEAR) { + rate_linear = (struct rpmi_clk_rate_linear *)resp->rates; + + rpmi_clk->rates->linear.min = rpmi_clkrate_u64(le32_to_cpu(rate_linear->min_hi), + le32_to_cpu(rate_linear->min_lo)); + rpmi_clk->rates->linear.max = rpmi_clkrate_u64(le32_to_cpu(rate_linear->max_hi), + le32_to_cpu(rate_linear->max_lo)); + rpmi_clk->rates->linear.step = rpmi_clkrate_u64(le32_to_cpu(rate_linear->step_hi), + le32_to_cpu(rate_linear->step_lo)); + } + + return 0; +} + +static unsigned long rpmi_clk_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct rpmi_clk *rpmi_clk = to_rpmi_clk(hw); + struct rpmi_clk_context *context = rpmi_clk->context; + struct rpmi_mbox_message msg; + struct rpmi_get_rate_tx tx; + struct rpmi_get_rate_rx rx, *resp; + int ret; + + tx.clkid = cpu_to_le32(rpmi_clk->id); + + rpmi_mbox_init_send_with_response(&msg, RPMI_CLK_SRV_GET_RATE, + &tx, sizeof(tx), &rx, sizeof(rx)); + + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return ret; + + resp = rpmi_mbox_get_msg_response(&msg); + if (!resp) + return -EINVAL; + if (resp->status) + return rpmi_to_linux_error(le32_to_cpu(resp->status)); + + return rpmi_clkrate_u64(le32_to_cpu(resp->hi), le32_to_cpu(resp->lo)); +} + +static int rpmi_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + struct rpmi_clk *rpmi_clk = to_rpmi_clk(hw); + u64 fmin, fmax, ftmp; + + /* + * Keep the requested rate if the clock format + * is of discrete type. Let the platform which + * is actually controlling the clock handle that. + */ + if (rpmi_clk->type == RPMI_CLK_DISCRETE) + return 0; + + fmin = rpmi_clk->rates->linear.min; + fmax = rpmi_clk->rates->linear.max; + + if (req->rate <= fmin) { + req->rate = fmin; + return 0; + } else if (req->rate >= fmax) { + req->rate = fmax; + return 0; + } + + ftmp = req->rate - fmin; + ftmp += rpmi_clk->rates->linear.step - 1; + do_div(ftmp, rpmi_clk->rates->linear.step); + + req->rate = ftmp * rpmi_clk->rates->linear.step + fmin; + + return 0; +} + +static int rpmi_clk_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct rpmi_clk *rpmi_clk = to_rpmi_clk(hw); + struct rpmi_clk_context *context = rpmi_clk->context; + struct rpmi_mbox_message msg; + struct rpmi_set_rate_tx tx; + struct rpmi_set_rate_rx rx, *resp; + int ret; + + tx.clkid = cpu_to_le32(rpmi_clk->id); + tx.lo = cpu_to_le32(lower_32_bits(rate)); + tx.hi = cpu_to_le32(upper_32_bits(rate)); + + rpmi_mbox_init_send_with_response(&msg, RPMI_CLK_SRV_SET_RATE, + &tx, sizeof(tx), &rx, sizeof(rx)); + + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return ret; + + resp = rpmi_mbox_get_msg_response(&msg); + if (!resp) + return -EINVAL; + if (resp->status) + return rpmi_to_linux_error(le32_to_cpu(resp->status)); + + return 0; +} + +static int rpmi_clk_enable(struct clk_hw *hw) +{ + struct rpmi_clk *rpmi_clk = to_rpmi_clk(hw); + struct rpmi_clk_context *context = rpmi_clk->context; + struct rpmi_mbox_message msg; + struct rpmi_set_config_tx tx; + struct rpmi_set_config_rx rx, *resp; + int ret; + + tx.config = cpu_to_le32(RPMI_CLK_ENABLE); + tx.clkid = cpu_to_le32(rpmi_clk->id); + + rpmi_mbox_init_send_with_response(&msg, RPMI_CLK_SRV_SET_CONFIG, + &tx, sizeof(tx), &rx, sizeof(rx)); + + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return ret; + + resp = rpmi_mbox_get_msg_response(&msg); + if (!resp) + return -EINVAL; + if (resp->status) + return rpmi_to_linux_error(le32_to_cpu(resp->status)); + + return 0; +} + +static void rpmi_clk_disable(struct clk_hw *hw) +{ + struct rpmi_clk *rpmi_clk = to_rpmi_clk(hw); + struct rpmi_clk_context *context = rpmi_clk->context; + struct rpmi_mbox_message msg; + struct rpmi_set_config_tx tx; + struct rpmi_set_config_rx rx; + + tx.config = cpu_to_le32(RPMI_CLK_DISABLE); + tx.clkid = cpu_to_le32(rpmi_clk->id); + + rpmi_mbox_init_send_with_response(&msg, RPMI_CLK_SRV_SET_CONFIG, + &tx, sizeof(tx), &rx, sizeof(rx)); + + rpmi_mbox_send_message(context->chan, &msg); +} + +static const struct clk_ops rpmi_clk_ops = { + .recalc_rate = rpmi_clk_recalc_rate, + .determine_rate = rpmi_clk_determine_rate, + .set_rate = rpmi_clk_set_rate, + .prepare = rpmi_clk_enable, + .unprepare = rpmi_clk_disable, +}; + +static struct clk_hw *rpmi_clk_enumerate(struct rpmi_clk_context *context, u32 clkid) +{ + struct device *dev = context->dev; + unsigned long min_rate, max_rate; + union rpmi_clk_rates *rates; + struct rpmi_clk *rpmi_clk; + struct clk_init_data init = {}; + struct clk_hw *clk_hw; + int ret; + + rates = devm_kzalloc(dev, sizeof(*rates), GFP_KERNEL); + if (!rates) + return ERR_PTR(-ENOMEM); + + rpmi_clk = devm_kzalloc(dev, sizeof(*rpmi_clk), GFP_KERNEL); + if (!rpmi_clk) + return ERR_PTR(-ENOMEM); + + rpmi_clk->context = context; + rpmi_clk->rates = rates; + + ret = rpmi_clk_get_attrs(clkid, rpmi_clk); + if (ret) + return dev_err_ptr_probe(dev, ret, + "Failed to get clk-%u attributes\n", + clkid); + + ret = rpmi_clk_get_supported_rates(clkid, rpmi_clk); + if (ret) + return dev_err_ptr_probe(dev, ret, + "Get supported rates failed for clk-%u\n", + clkid); + + init.flags = CLK_GET_RATE_NOCACHE; + init.num_parents = 0; + init.ops = &rpmi_clk_ops; + init.name = rpmi_clk->name; + clk_hw = &rpmi_clk->hw; + clk_hw->init = &init; + + ret = devm_clk_hw_register(dev, clk_hw); + if (ret) + return dev_err_ptr_probe(dev, ret, + "Unable to register clk-%u\n", + clkid); + + if (rpmi_clk->type == RPMI_CLK_DISCRETE) { + min_rate = rpmi_clk->rates->discrete[0]; + max_rate = rpmi_clk->rates->discrete[rpmi_clk->num_rates - 1]; + } else { + min_rate = rpmi_clk->rates->linear.min; + max_rate = rpmi_clk->rates->linear.max; + } + + clk_hw_set_rate_range(clk_hw, min_rate, max_rate); + + return clk_hw; +} + +static void rpmi_clk_mbox_chan_release(void *data) +{ + struct mbox_chan *chan = data; + + mbox_free_channel(chan); +} + +static int rpmi_clk_probe(struct platform_device *pdev) +{ + int ret; + unsigned int num_clocks, i; + struct clk_hw_onecell_data *clk_data; + struct rpmi_clk_context *context; + struct rpmi_mbox_message msg; + struct clk_hw *hw_ptr; + struct device *dev = &pdev->dev; + + context = devm_kzalloc(dev, sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + context->dev = dev; + platform_set_drvdata(pdev, context); + + context->client.dev = context->dev; + context->client.rx_callback = NULL; + context->client.tx_block = false; + context->client.knows_txdone = true; + context->client.tx_tout = 0; + + context->chan = mbox_request_channel(&context->client, 0); + if (IS_ERR(context->chan)) + return PTR_ERR(context->chan); + + ret = devm_add_action_or_reset(dev, rpmi_clk_mbox_chan_release, context->chan); + if (ret) + return dev_err_probe(dev, ret, "Failed to add rpmi mbox channel cleanup\n"); + + rpmi_mbox_init_get_attribute(&msg, RPMI_MBOX_ATTR_SPEC_VERSION); + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return dev_err_probe(dev, ret, "Failed to get spec version\n"); + if (msg.attr.value < RPMI_MKVER(1, 0)) { + return dev_err_probe(dev, -EINVAL, + "msg protocol version mismatch, expected 0x%x, found 0x%x\n", + RPMI_MKVER(1, 0), msg.attr.value); + } + + rpmi_mbox_init_get_attribute(&msg, RPMI_MBOX_ATTR_SERVICEGROUP_ID); + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return dev_err_probe(dev, ret, "Failed to get service group ID\n"); + if (msg.attr.value != RPMI_SRVGRP_CLOCK) { + return dev_err_probe(dev, -EINVAL, + "service group match failed, expected 0x%x, found 0x%x\n", + RPMI_SRVGRP_CLOCK, msg.attr.value); + } + + rpmi_mbox_init_get_attribute(&msg, RPMI_MBOX_ATTR_SERVICEGROUP_VERSION); + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return dev_err_probe(dev, ret, "Failed to get service group version\n"); + if (msg.attr.value < RPMI_MKVER(1, 0)) { + return dev_err_probe(dev, -EINVAL, + "service group version failed, expected 0x%x, found 0x%x\n", + RPMI_MKVER(1, 0), msg.attr.value); + } + + rpmi_mbox_init_get_attribute(&msg, RPMI_MBOX_ATTR_MAX_MSG_DATA_SIZE); + ret = rpmi_mbox_send_message(context->chan, &msg); + if (ret) + return dev_err_probe(dev, ret, "Failed to get max message data size\n"); + + context->max_msg_data_size = msg.attr.value; + num_clocks = rpmi_clk_get_num_clocks(context); + if (!num_clocks) + return dev_err_probe(dev, -ENODEV, "No clocks found\n"); + + clk_data = devm_kzalloc(dev, struct_size(clk_data, hws, num_clocks), + GFP_KERNEL); + if (!clk_data) + return dev_err_probe(dev, -ENOMEM, "No memory for clock data\n"); + clk_data->num = num_clocks; + + for (i = 0; i < clk_data->num; i++) { + hw_ptr = rpmi_clk_enumerate(context, i); + if (IS_ERR(hw_ptr)) { + return dev_err_probe(dev, PTR_ERR(hw_ptr), + "Failed to register clk-%d\n", i); + } + clk_data->hws[i] = hw_ptr; + } + + ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, clk_data); + if (ret) + return dev_err_probe(dev, ret, "Failed to register clock HW provider\n"); + + return 0; +} + +static const struct of_device_id rpmi_clk_of_match[] = { + { .compatible = "riscv,rpmi-clock" }, + { } +}; +MODULE_DEVICE_TABLE(of, rpmi_clk_of_match); + +static struct platform_driver rpmi_clk_driver = { + .driver = { + .name = "riscv-rpmi-clock", + .of_match_table = rpmi_clk_of_match, + }, + .probe = rpmi_clk_probe, +}; +module_platform_driver(rpmi_clk_driver); + +MODULE_AUTHOR("Rahul Pathak <rpathak@ventanamicro.com>"); +MODULE_DESCRIPTION("Clock Driver based on RPMI message protocol"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 712624cba2b6..d7a5539d07d4 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -20,8 +20,7 @@ static const guid_t acpi_cxl_qtg_id_guid = GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071, 0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52); - -static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) +static u64 cxl_apply_xor_maps(struct cxl_root_decoder *cxlrd, u64 addr) { struct cxl_cxims_data *cximsd = cxlrd->platform_data; int hbiw = cxlrd->cxlsd.nr_targets; @@ -30,19 +29,23 @@ static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) /* No xormaps for host bridge interleave ways of 1 or 3 */ if (hbiw == 1 || hbiw == 3) - return hpa; + return addr; /* - * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) restore - * the position bit to its value before the xormap was applied at - * HPA->DPA translation. + * In regions using XOR interleave arithmetic the CXL HPA may not + * be the same as the SPA. This helper performs the SPA->CXL HPA + * or the CXL HPA->SPA translation. Since XOR is self-inverting, + * so is this function. + * + * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) applying the + * xormaps will toggle a position bit. * * pos is the lowest set bit in an XORMAP - * val is the XORALLBITS(HPA & XORMAP) + * val is the XORALLBITS(addr & XORMAP) * * XORALLBITS: The CXL spec (3.1 Table 9-22) defines XORALLBITS * as an operation that outputs a single bit by XORing all the - * bits in the input (hpa & xormap). Implement XORALLBITS using + * bits in the input (addr & xormap). Implement XORALLBITS using * hweight64(). If the hamming weight is even the XOR of those * bits results in val==0, if odd the XOR result is val==1. */ @@ -51,11 +54,11 @@ static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) if (!cximsd->xormaps[i]) continue; pos = __ffs(cximsd->xormaps[i]); - val = (hweight64(hpa & cximsd->xormaps[i]) & 1); - hpa = (hpa & ~(1ULL << pos)) | (val << pos); + val = (hweight64(addr & cximsd->xormaps[i]) & 1); + addr = (addr & ~(1ULL << pos)) | (val << pos); } - return hpa; + return addr; } struct cxl_cxims_context { @@ -113,9 +116,9 @@ static unsigned long cfmws_to_decoder_flags(int restrictions) { unsigned long flags = CXL_DECODER_F_ENABLE; - if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE2) + if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_DEVMEM) flags |= CXL_DECODER_F_TYPE2; - if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE3) + if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM) flags |= CXL_DECODER_F_TYPE3; if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_VOLATILE) flags |= CXL_DECODER_F_RAM; @@ -398,7 +401,6 @@ DEFINE_FREE(del_cxl_resource, struct resource *, if (_T) del_cxl_resource(_T)) static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, struct cxl_cfmws_context *ctx) { - int target_map[CXL_DECODER_MAX_INTERLEAVE]; struct cxl_port *root_port = ctx->root_port; struct cxl_cxims_context cxims_ctx; struct device *dev = ctx->dev; @@ -416,8 +418,6 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, rc = eig_to_granularity(cfmws->granularity, &ig); if (rc) return rc; - for (i = 0; i < ways; i++) - target_map[i] = cfmws->interleave_targets[i]; struct resource *res __free(del_cxl_resource) = alloc_cxl_resource( cfmws->base_hpa, cfmws->window_size, ctx->id++); @@ -443,6 +443,8 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, .end = cfmws->base_hpa + cfmws->window_size - 1, }; cxld->interleave_ways = ways; + for (i = 0; i < ways; i++) + cxld->target_map[i] = cfmws->interleave_targets[i]; /* * Minimize the x1 granularity to advertise support for any * valid region granularity @@ -472,10 +474,16 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, cxlrd->qos_class = cfmws->qtg_id; - if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) - cxlrd->hpa_to_spa = cxl_xor_hpa_to_spa; + if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) { + cxlrd->ops = kzalloc(sizeof(*cxlrd->ops), GFP_KERNEL); + if (!cxlrd->ops) + return -ENOMEM; + + cxlrd->ops->hpa_to_spa = cxl_apply_xor_maps; + cxlrd->ops->spa_to_hpa = cxl_apply_xor_maps; + } - rc = cxl_decoder_add(cxld, target_map); + rc = cxl_decoder_add(cxld); if (rc) return rc; diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index c0af645425f4..c4bd6e8a0cf0 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -338,7 +338,7 @@ static int match_cxlrd_hb(struct device *dev, void *data) guard(rwsem_read)(&cxl_rwsem.region); for (int i = 0; i < cxlsd->nr_targets; i++) { - if (host_bridge == cxlsd->target[i]->dport_dev) + if (cxlsd->target[i] && host_bridge == cxlsd->target[i]->dport_dev) return 1; } @@ -440,8 +440,8 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, } *tbl = (struct acpi_cdat_sslbis_table *)header; int size = sizeof(header->cdat) + sizeof(tbl->sslbis_header); struct acpi_cdat_sslbis *sslbis; - struct cxl_port *port = arg; - struct device *dev = &port->dev; + struct cxl_dport *dport = arg; + struct device *dev = &dport->port->dev; int remain, entries, i; u16 len; @@ -467,8 +467,6 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, u16 y = le16_to_cpu((__force __le16)tbl->entries[i].porty_id); __le64 le_base; __le16 le_val; - struct cxl_dport *dport; - unsigned long index; u16 dsp_id; u64 val; @@ -499,28 +497,27 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base), sslbis->data_type); - xa_for_each(&port->dports, index, dport) { - if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT || - dsp_id == dport->port_id) { - cxl_access_coordinate_set(dport->coord, - sslbis->data_type, - val); - } + if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT || + dsp_id == dport->port_id) { + cxl_access_coordinate_set(dport->coord, + sslbis->data_type, val); + return 0; } } return 0; } -void cxl_switch_parse_cdat(struct cxl_port *port) +void cxl_switch_parse_cdat(struct cxl_dport *dport) { + struct cxl_port *port = dport->port; int rc; if (!port->cdat.table) return; rc = cdat_table_parse(ACPI_CDAT_TYPE_SSLBIS, cdat_sslbis_handler, - port, port->cdat.table, port->cdat.length); + dport, port->cdat.table, port->cdat.length); rc = cdat_table_parse_output(rc); if (rc) dev_dbg(&port->dev, "Failed to parse SSLBIS: %d\n", rc); @@ -1075,14 +1072,3 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr, cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth; } } - -int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, - enum access_coordinate_class access) -{ - return hmat_update_target_coordinates(nid, &cxlr->coord[access], access); -} - -bool cxl_need_node_perf_attrs_update(int nid) -{ - return !acpi_node_backed_by_real_pxm(nid); -} diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 2669f251d677..1fb66132b777 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -135,11 +135,12 @@ enum cxl_poison_trace_type { CXL_POISON_TRACE_CLEAR, }; +enum poison_cmd_enabled_bits; +bool cxl_memdev_has_poison_cmd(struct cxl_memdev *cxlmd, + enum poison_cmd_enabled_bits cmd); + long cxl_pci_get_latency(struct pci_dev *pdev); int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); -int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, - enum access_coordinate_class access); -bool cxl_need_node_perf_attrs_update(int nid); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); @@ -147,6 +148,11 @@ int cxl_ras_init(void); void cxl_ras_exit(void); int cxl_gpf_port_setup(struct cxl_dport *dport); +struct cxl_hdm; +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, + struct cxl_endpoint_dvsec_info *info); +int cxl_port_get_possible_dports(struct cxl_port *port); + #ifdef CONFIG_CXL_FEATURES struct cxl_feat_entry * cxl_feature_info(struct cxl_features_state *cxlfs, const uuid_t *uuid); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index e9e1d555cec6..d3a094ca01ad 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -21,12 +21,11 @@ struct cxl_rwsem cxl_rwsem = { .dpa = __RWSEM_INITIALIZER(cxl_rwsem.dpa), }; -static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, - int *target_map) +static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld) { int rc; - rc = cxl_decoder_add_locked(cxld, target_map); + rc = cxl_decoder_add_locked(cxld); if (rc) { put_device(&cxld->dev); dev_err(&port->dev, "Failed to add decoder\n"); @@ -50,12 +49,9 @@ static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, * are claimed and passed to the single dport. Disable the range until the first * CXL region is enumerated / activated. */ -int devm_cxl_add_passthrough_decoder(struct cxl_port *port) +static int devm_cxl_add_passthrough_decoder(struct cxl_port *port) { struct cxl_switch_decoder *cxlsd; - struct cxl_dport *dport = NULL; - int single_port_map[1]; - unsigned long index; struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); /* @@ -71,13 +67,8 @@ int devm_cxl_add_passthrough_decoder(struct cxl_port *port) device_lock_assert(&port->dev); - xa_for_each(&port->dports, index, dport) - break; - single_port_map[0] = dport->port_id; - - return add_hdm_decoder(port, &cxlsd->cxld, single_port_map); + return add_hdm_decoder(port, &cxlsd->cxld); } -EXPORT_SYMBOL_NS_GPL(devm_cxl_add_passthrough_decoder, "CXL"); static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm) { @@ -147,8 +138,8 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) * @port: cxl_port to map * @info: cached DVSEC range register info */ -struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, - struct cxl_endpoint_dvsec_info *info) +static struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, + struct cxl_endpoint_dvsec_info *info) { struct cxl_register_map *reg_map = &port->reg_map; struct device *dev = &port->dev; @@ -197,13 +188,12 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, */ if (should_emulate_decoders(info)) { dev_dbg(dev, "Fallback map %d range register%s\n", info->ranges, - info->ranges > 1 ? "s" : ""); + str_plural(info->ranges)); cxlhdm->decoder_count = info->ranges; } return cxlhdm; } -EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, "CXL"); static void __cxl_dpa_debug(struct seq_file *file, struct resource *r, int depth) { @@ -984,7 +974,7 @@ static int cxl_setup_hdm_decoder_from_dvsec( } static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, - int *target_map, void __iomem *hdm, int which, + void __iomem *hdm, int which, u64 *dpa_base, struct cxl_endpoint_dvsec_info *info) { struct cxl_endpoint_decoder *cxled = NULL; @@ -1103,7 +1093,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which)); target_list.value = (hi << 32) + lo; for (i = 0; i < cxld->interleave_ways; i++) - target_map[i] = target_list.target_id[i]; + cxld->target_map[i] = target_list.target_id[i]; return 0; } @@ -1168,8 +1158,8 @@ static void cxl_settle_decoders(struct cxl_hdm *cxlhdm) * @cxlhdm: Structure to populate with HDM capabilities * @info: cached DVSEC range register info */ -int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info) +static int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, + struct cxl_endpoint_dvsec_info *info) { void __iomem *hdm = cxlhdm->regs.hdm_decoder; struct cxl_port *port = cxlhdm->port; @@ -1179,7 +1169,6 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, cxl_settle_decoders(cxlhdm); for (i = 0; i < cxlhdm->decoder_count; i++) { - int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; int rc, target_count = cxlhdm->target_count; struct cxl_decoder *cxld; @@ -1207,8 +1196,7 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, cxld = &cxlsd->cxld; } - rc = init_hdm_decoder(port, cxld, target_map, hdm, i, - &dpa_base, info); + rc = init_hdm_decoder(port, cxld, hdm, i, &dpa_base, info); if (rc) { dev_warn(&port->dev, "Failed to initialize decoder%d.%d\n", @@ -1216,7 +1204,7 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, put_device(&cxld->dev); return rc; } - rc = add_hdm_decoder(port, cxld, target_map); + rc = add_hdm_decoder(port, cxld); if (rc) { dev_warn(&port->dev, "Failed to add decoder%d.%d\n", port->id, i); @@ -1226,4 +1214,71 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, return 0; } -EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_decoders, "CXL"); + +/** + * __devm_cxl_switch_port_decoders_setup - allocate and setup switch decoders + * @port: CXL port context + * + * Return 0 or -errno on error + */ +int __devm_cxl_switch_port_decoders_setup(struct cxl_port *port) +{ + struct cxl_hdm *cxlhdm; + + if (is_cxl_root(port) || is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + cxlhdm = devm_cxl_setup_hdm(port, NULL); + if (!IS_ERR(cxlhdm)) + return devm_cxl_enumerate_decoders(cxlhdm, NULL); + + if (PTR_ERR(cxlhdm) != -ENODEV) { + dev_err(&port->dev, "Failed to map HDM decoder capability\n"); + return PTR_ERR(cxlhdm); + } + + if (cxl_port_get_possible_dports(port) == 1) { + dev_dbg(&port->dev, "Fallback to passthrough decoder\n"); + return devm_cxl_add_passthrough_decoder(port); + } + + dev_err(&port->dev, "HDM decoder capability not found\n"); + return -ENXIO; +} +EXPORT_SYMBOL_NS_GPL(__devm_cxl_switch_port_decoders_setup, "CXL"); + +/** + * devm_cxl_endpoint_decoders_setup - allocate and setup endpoint decoders + * @port: CXL port context + * + * Return 0 or -errno on error + */ +int devm_cxl_endpoint_decoders_setup(struct cxl_port *port) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); + struct cxl_endpoint_dvsec_info info = { .port = port }; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_hdm *cxlhdm; + int rc; + + if (!is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + rc = cxl_dvsec_rr_decode(cxlds, &info); + if (rc < 0) + return rc; + + cxlhdm = devm_cxl_setup_hdm(port, &info); + if (IS_ERR(cxlhdm)) { + if (PTR_ERR(cxlhdm) == -ENODEV) + dev_err(&port->dev, "HDM decoder registers not found\n"); + return PTR_ERR(cxlhdm); + } + + rc = cxl_hdm_decode_init(cxlds, cxlhdm, &info); + if (rc) + return rc; + + return devm_cxl_enumerate_decoders(cxlhdm, &info); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_endpoint_decoders_setup, "CXL"); diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index c569e00a511f..e370d733e440 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -200,6 +200,14 @@ static ssize_t security_erase_store(struct device *dev, static struct device_attribute dev_attr_security_erase = __ATTR(erase, 0200, NULL, security_erase_store); +bool cxl_memdev_has_poison_cmd(struct cxl_memdev *cxlmd, + enum poison_cmd_enabled_bits cmd) +{ + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + + return test_bit(cmd, mds->poison.enabled_cmds); +} + static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; @@ -276,7 +284,7 @@ static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) return 0; } -int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) +int cxl_inject_poison_locked(struct cxl_memdev *cxlmd, u64 dpa) { struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_mbox_inject_poison inject; @@ -288,13 +296,8 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) - return rc; - - ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) - return rc; + lockdep_assert_held(&cxl_rwsem.dpa); + lockdep_assert_held(&cxl_rwsem.region); rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) @@ -324,9 +327,24 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) return 0; } + +int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) +{ + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + return cxl_inject_poison_locked(cxlmd, dpa); +} EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, "CXL"); -int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) +int cxl_clear_poison_locked(struct cxl_memdev *cxlmd, u64 dpa) { struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_mbox_clear_poison clear; @@ -338,13 +356,8 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) - return rc; - - ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) - return rc; + lockdep_assert_held(&cxl_rwsem.dpa); + lockdep_assert_held(&cxl_rwsem.region); rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) @@ -383,6 +396,21 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) return 0; } + +int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) +{ + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + return cxl_clear_poison_locked(cxlmd, dpa); +} EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, "CXL"); static struct attribute *cxl_memdev_attributes[] = { diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index b50551601c2e..18825e1505d6 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -24,6 +24,53 @@ static unsigned short media_ready_timeout = 60; module_param(media_ready_timeout, ushort, 0644); MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready"); +static int pci_get_port_num(struct pci_dev *pdev) +{ + u32 lnkcap; + int type; + + type = pci_pcie_type(pdev); + if (type != PCI_EXP_TYPE_DOWNSTREAM && type != PCI_EXP_TYPE_ROOT_PORT) + return -EINVAL; + + if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, + &lnkcap)) + return -ENXIO; + + return FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); +} + +/** + * __devm_cxl_add_dport_by_dev - allocate a dport by dport device + * @port: cxl_port that hosts the dport + * @dport_dev: 'struct device' of the dport + * + * Returns the allocated dport on success or ERR_PTR() of -errno on error + */ +struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) +{ + struct cxl_register_map map; + struct pci_dev *pdev; + int port_num, rc; + + if (!dev_is_pci(dport_dev)) + return ERR_PTR(-EINVAL); + + pdev = to_pci_dev(dport_dev); + port_num = pci_get_port_num(pdev); + if (port_num < 0) + return ERR_PTR(port_num); + + rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + if (rc) + return ERR_PTR(rc); + + device_lock_assert(&port->dev); + return devm_cxl_add_dport(port, dport_dev, port_num, map.resource); +} +EXPORT_SYMBOL_NS_GPL(__devm_cxl_add_dport_by_dev, "CXL"); + struct cxl_walk_context { struct pci_bus *bus; struct cxl_port *port; @@ -1169,3 +1216,45 @@ int cxl_gpf_port_setup(struct cxl_dport *dport) return 0; } + +static int count_dports(struct pci_dev *pdev, void *data) +{ + struct cxl_walk_context *ctx = data; + int type = pci_pcie_type(pdev); + + if (pdev->bus != ctx->bus) + return 0; + if (!pci_is_pcie(pdev)) + return 0; + if (type != ctx->type) + return 0; + + ctx->count++; + return 0; +} + +int cxl_port_get_possible_dports(struct cxl_port *port) +{ + struct pci_bus *bus = cxl_port_to_pci_bus(port); + struct cxl_walk_context ctx; + int type; + + if (!bus) { + dev_err(&port->dev, "No PCI bus found for port %s\n", + dev_name(&port->dev)); + return -ENXIO; + } + + if (pci_is_root_bus(bus)) + type = PCI_EXP_TYPE_ROOT_PORT; + else + type = PCI_EXP_TYPE_DOWNSTREAM; + + ctx = (struct cxl_walk_context) { + .bus = bus, + .type = type, + }; + pci_walk_bus(bus, count_dports, &ctx); + + return ctx.count; +} diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 29197376b18e..d5f71eb1ade8 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -33,6 +33,15 @@ static DEFINE_IDA(cxl_port_ida); static DEFINE_XARRAY(cxl_root_buses); +/* + * The terminal device in PCI is NULL and @platform_bus + * for platform devices (for cxl_test) + */ +static bool is_cxl_host_bridge(struct device *dev) +{ + return (!dev || dev == &platform_bus); +} + int cxl_num_decoders_committed(struct cxl_port *port) { lockdep_assert_held(&cxl_rwsem.region); @@ -450,6 +459,7 @@ static void cxl_root_decoder_release(struct device *dev) if (atomic_read(&cxlrd->region_id) >= 0) memregion_free(atomic_read(&cxlrd->region_id)); __cxl_decoder_release(&cxlrd->cxlsd.cxld); + kfree(cxlrd->ops); kfree(cxlrd); } @@ -740,6 +750,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev, xa_init(&port->dports); xa_init(&port->endpoints); xa_init(&port->regions); + port->component_reg_phys = CXL_RESOURCE_NONE; device_initialize(dev); lockdep_set_class_and_subclass(&dev->mutex, &cxl_port_key, port->depth); @@ -858,9 +869,7 @@ static int cxl_port_add(struct cxl_port *port, if (rc) return rc; - rc = cxl_port_setup_regs(port, component_reg_phys); - if (rc) - return rc; + port->component_reg_phys = component_reg_phys; } else { rc = dev_set_name(dev, "root%d", port->id); if (rc) @@ -1191,6 +1200,18 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, cxl_debugfs_create_dport_dir(dport); + /* + * Setup port register if this is the first dport showed up. Having + * a dport also means that there is at least 1 active link. + */ + if (port->nr_dports == 1 && + port->component_reg_phys != CXL_RESOURCE_NONE) { + rc = cxl_port_setup_regs(port, port->component_reg_phys); + if (rc) + return ERR_PTR(rc); + port->component_reg_phys = CXL_RESOURCE_NONE; + } + return dport; } @@ -1348,21 +1369,6 @@ static struct cxl_port *find_cxl_port(struct device *dport_dev, return port; } -static struct cxl_port *find_cxl_port_at(struct cxl_port *parent_port, - struct device *dport_dev, - struct cxl_dport **dport) -{ - struct cxl_find_port_ctx ctx = { - .dport_dev = dport_dev, - .parent_port = parent_port, - .dport = dport, - }; - struct cxl_port *port; - - port = __find_cxl_port(&ctx); - return port; -} - /* * All users of grandparent() are using it to walk PCIe-like switch port * hierarchy. A PCIe switch is comprised of a bridge device representing the @@ -1423,7 +1429,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, "CXL"); * through ->remove(). This "bottom-up" removal selectively removes individual * child ports manually. This depends on devm_cxl_add_port() to not change is * devm action registration order, and for dports to have already been - * destroyed by reap_dports(). + * destroyed by del_dports(). */ static void delete_switch_port(struct cxl_port *port) { @@ -1432,18 +1438,24 @@ static void delete_switch_port(struct cxl_port *port) devm_release_action(port->dev.parent, unregister_port, port); } -static void reap_dports(struct cxl_port *port) +static void del_dport(struct cxl_dport *dport) +{ + struct cxl_port *port = dport->port; + + devm_release_action(&port->dev, cxl_dport_unlink, dport); + devm_release_action(&port->dev, cxl_dport_remove, dport); + devm_kfree(&port->dev, dport); +} + +static void del_dports(struct cxl_port *port) { struct cxl_dport *dport; unsigned long index; device_lock_assert(&port->dev); - xa_for_each(&port->dports, index, dport) { - devm_release_action(&port->dev, cxl_dport_unlink, dport); - devm_release_action(&port->dev, cxl_dport_remove, dport); - devm_kfree(&port->dev, dport); - } + xa_for_each(&port->dports, index, dport) + del_dport(dport); } struct detach_ctx { @@ -1501,7 +1513,7 @@ static void cxl_detach_ep(void *data) */ died = true; port->dead = true; - reap_dports(port); + del_dports(port); } device_unlock(&port->dev); @@ -1532,16 +1544,157 @@ static resource_size_t find_component_registers(struct device *dev) return map.resource; } +static int match_port_by_uport(struct device *dev, const void *data) +{ + const struct device *uport_dev = data; + struct cxl_port *port; + + if (!is_cxl_port(dev)) + return 0; + + port = to_cxl_port(dev); + return uport_dev == port->uport_dev; +} + +/* + * Function takes a device reference on the port device. Caller should do a + * put_device() when done. + */ +static struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev) +{ + struct device *dev; + + dev = bus_find_device(&cxl_bus_type, NULL, uport_dev, match_port_by_uport); + if (dev) + return to_cxl_port(dev); + return NULL; +} + +static int update_decoder_targets(struct device *dev, void *data) +{ + struct cxl_dport *dport = data; + struct cxl_switch_decoder *cxlsd; + struct cxl_decoder *cxld; + int i; + + if (!is_switch_decoder(dev)) + return 0; + + cxlsd = to_cxl_switch_decoder(dev); + cxld = &cxlsd->cxld; + guard(rwsem_write)(&cxl_rwsem.region); + + for (i = 0; i < cxld->interleave_ways; i++) { + if (cxld->target_map[i] == dport->port_id) { + cxlsd->target[i] = dport; + dev_dbg(dev, "dport%d found in target list, index %d\n", + dport->port_id, i); + return 1; + } + } + + return 0; +} + +DEFINE_FREE(del_cxl_dport, struct cxl_dport *, if (!IS_ERR_OR_NULL(_T)) del_dport(_T)) +static struct cxl_dport *cxl_port_add_dport(struct cxl_port *port, + struct device *dport_dev) +{ + struct cxl_dport *dport; + int rc; + + device_lock_assert(&port->dev); + if (!port->dev.driver) + return ERR_PTR(-ENXIO); + + dport = cxl_find_dport_by_dev(port, dport_dev); + if (dport) { + dev_dbg(&port->dev, "dport%d:%s already exists\n", + dport->port_id, dev_name(dport_dev)); + return ERR_PTR(-EBUSY); + } + + struct cxl_dport *new_dport __free(del_cxl_dport) = + devm_cxl_add_dport_by_dev(port, dport_dev); + if (IS_ERR(new_dport)) + return new_dport; + + cxl_switch_parse_cdat(new_dport); + + if (ida_is_empty(&port->decoder_ida)) { + rc = devm_cxl_switch_port_decoders_setup(port); + if (rc) + return ERR_PTR(rc); + dev_dbg(&port->dev, "first dport%d:%s added with decoders\n", + new_dport->port_id, dev_name(dport_dev)); + return no_free_ptr(new_dport); + } + + /* New dport added, update the decoder targets */ + device_for_each_child(&port->dev, new_dport, update_decoder_targets); + + dev_dbg(&port->dev, "dport%d:%s added\n", new_dport->port_id, + dev_name(dport_dev)); + + return no_free_ptr(new_dport); +} + +static struct cxl_dport *devm_cxl_create_port(struct device *ep_dev, + struct cxl_port *parent_port, + struct cxl_dport *parent_dport, + struct device *uport_dev, + struct device *dport_dev) +{ + resource_size_t component_reg_phys; + + device_lock_assert(&parent_port->dev); + if (!parent_port->dev.driver) { + dev_warn(ep_dev, + "port %s:%s:%s disabled, failed to enumerate CXL.mem\n", + dev_name(&parent_port->dev), dev_name(uport_dev), + dev_name(dport_dev)); + } + + struct cxl_port *port __free(put_cxl_port) = + find_cxl_port_by_uport(uport_dev); + if (!port) { + component_reg_phys = find_component_registers(uport_dev); + port = devm_cxl_add_port(&parent_port->dev, uport_dev, + component_reg_phys, parent_dport); + if (IS_ERR(port)) + return ERR_CAST(port); + + /* + * retry to make sure a port is found. a port device + * reference is taken. + */ + port = find_cxl_port_by_uport(uport_dev); + if (!port) + return ERR_PTR(-ENODEV); + + dev_dbg(ep_dev, "created port %s:%s\n", + dev_name(&port->dev), dev_name(port->uport_dev)); + } else { + /* + * Port was created before right before this function is + * called. Signal the caller to deal with it. + */ + return ERR_PTR(-EAGAIN); + } + + guard(device)(&port->dev); + return cxl_port_add_dport(port, dport_dev); +} + static int add_port_attach_ep(struct cxl_memdev *cxlmd, struct device *uport_dev, struct device *dport_dev) { struct device *dparent = grandparent(dport_dev); struct cxl_dport *dport, *parent_dport; - resource_size_t component_reg_phys; int rc; - if (!dparent) { + if (is_cxl_host_bridge(dparent)) { /* * The iteration reached the topology root without finding the * CXL-root 'cxl_port' on a previous iteration, fail for now to @@ -1553,42 +1706,31 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, } struct cxl_port *parent_port __free(put_cxl_port) = - find_cxl_port(dparent, &parent_dport); + find_cxl_port_by_uport(dparent->parent); if (!parent_port) { /* iterate to create this parent_port */ return -EAGAIN; } - /* - * Definition with __free() here to keep the sequence of - * dereferencing the device of the port before the parent_port releasing. - */ - struct cxl_port *port __free(put_cxl_port) = NULL; scoped_guard(device, &parent_port->dev) { - if (!parent_port->dev.driver) { - dev_warn(&cxlmd->dev, - "port %s:%s disabled, failed to enumerate CXL.mem\n", - dev_name(&parent_port->dev), dev_name(uport_dev)); - return -ENXIO; + parent_dport = cxl_find_dport_by_dev(parent_port, dparent); + if (!parent_dport) { + parent_dport = cxl_port_add_dport(parent_port, dparent); + if (IS_ERR(parent_dport)) + return PTR_ERR(parent_dport); } - port = find_cxl_port_at(parent_port, dport_dev, &dport); - if (!port) { - component_reg_phys = find_component_registers(uport_dev); - port = devm_cxl_add_port(&parent_port->dev, uport_dev, - component_reg_phys, parent_dport); - if (IS_ERR(port)) - return PTR_ERR(port); - - /* retry find to pick up the new dport information */ - port = find_cxl_port_at(parent_port, dport_dev, &dport); - if (!port) - return -ENXIO; + dport = devm_cxl_create_port(&cxlmd->dev, parent_port, + parent_dport, uport_dev, + dport_dev); + if (IS_ERR(dport)) { + /* Port already exists, restart iteration */ + if (PTR_ERR(dport) == -EAGAIN) + return 0; + return PTR_ERR(dport); } } - dev_dbg(&cxlmd->dev, "add to new port %s:%s\n", - dev_name(&port->dev), dev_name(port->uport_dev)); rc = cxl_add_ep(dport, &cxlmd->dev); if (rc == -EBUSY) { /* @@ -1601,6 +1743,25 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, return rc; } +static struct cxl_dport *find_or_add_dport(struct cxl_port *port, + struct device *dport_dev) +{ + struct cxl_dport *dport; + + device_lock_assert(&port->dev); + dport = cxl_find_dport_by_dev(port, dport_dev); + if (!dport) { + dport = cxl_port_add_dport(port, dport_dev); + if (IS_ERR(dport)) + return dport; + + /* New dport added, restart iteration */ + return ERR_PTR(-EAGAIN); + } + + return dport; +} + int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) { struct device *dev = &cxlmd->dev; @@ -1629,11 +1790,7 @@ retry: struct device *uport_dev; struct cxl_dport *dport; - /* - * The terminal "grandparent" in PCI is NULL and @platform_bus - * for platform devices - */ - if (!dport_dev || dport_dev == &platform_bus) + if (is_cxl_host_bridge(dport_dev)) return 0; uport_dev = dport_dev->parent; @@ -1647,12 +1804,26 @@ retry: dev_name(iter), dev_name(dport_dev), dev_name(uport_dev)); struct cxl_port *port __free(put_cxl_port) = - find_cxl_port(dport_dev, &dport); + find_cxl_port_by_uport(uport_dev); if (port) { dev_dbg(&cxlmd->dev, "found already registered port %s:%s\n", dev_name(&port->dev), dev_name(port->uport_dev)); + + /* + * RP port enumerated by cxl_acpi without dport will + * have the dport added here. + */ + scoped_guard(device, &port->dev) { + dport = find_or_add_dport(port, dport_dev); + if (IS_ERR(dport)) { + if (PTR_ERR(dport) == -EAGAIN) + goto retry; + return PTR_ERR(dport); + } + } + rc = cxl_add_ep(dport, &cxlmd->dev); /* @@ -1704,24 +1875,24 @@ struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd, EXPORT_SYMBOL_NS_GPL(cxl_mem_find_port, "CXL"); static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd, - struct cxl_port *port, int *target_map) + struct cxl_port *port) { + struct cxl_decoder *cxld = &cxlsd->cxld; int i; - if (!target_map) - return 0; - device_lock_assert(&port->dev); if (xa_empty(&port->dports)) - return -EINVAL; + return 0; guard(rwsem_write)(&cxl_rwsem.region); for (i = 0; i < cxlsd->cxld.interleave_ways; i++) { - struct cxl_dport *dport = find_dport(port, target_map[i]); + struct cxl_dport *dport = find_dport(port, cxld->target_map[i]); - if (!dport) - return -ENXIO; + if (!dport) { + /* dport may be activated later */ + continue; + } cxlsd->target[i] = dport; } @@ -1910,9 +2081,6 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, "CXL"); /** * cxl_decoder_add_locked - Add a decoder with targets * @cxld: The cxl decoder allocated by cxl_<type>_decoder_alloc() - * @target_map: A list of downstream ports that this decoder can direct memory - * traffic to. These numbers should correspond with the port number - * in the PCIe Link Capabilities structure. * * Certain types of decoders may not have any targets. The main example of this * is an endpoint device. A more awkward example is a hostbridge whose root @@ -1926,7 +2094,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, "CXL"); * Return: Negative error code if the decoder wasn't properly configured; else * returns 0. */ -int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map) +int cxl_decoder_add_locked(struct cxl_decoder *cxld) { struct cxl_port *port; struct device *dev; @@ -1947,7 +2115,7 @@ int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map) if (!is_endpoint_decoder(dev)) { struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(dev); - rc = decoder_populate_targets(cxlsd, port, target_map); + rc = decoder_populate_targets(cxlsd, port); if (rc && (cxld->flags & CXL_DECODER_F_ENABLE)) { dev_err(&port->dev, "Failed to populate active decoder targets\n"); @@ -1966,9 +2134,6 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, "CXL"); /** * cxl_decoder_add - Add a decoder with targets * @cxld: The cxl decoder allocated by cxl_<type>_decoder_alloc() - * @target_map: A list of downstream ports that this decoder can direct memory - * traffic to. These numbers should correspond with the port number - * in the PCIe Link Capabilities structure. * * This is the unlocked variant of cxl_decoder_add_locked(). * See cxl_decoder_add_locked(). @@ -1976,7 +2141,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, "CXL"); * Context: Process context. Takes and releases the device lock of the port that * owns the @cxld. */ -int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) +int cxl_decoder_add(struct cxl_decoder *cxld) { struct cxl_port *port; @@ -1989,7 +2154,7 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) port = to_cxl_port(cxld->dev.parent); guard(device)(&port->dev); - return cxl_decoder_add_locked(cxld, target_map); + return cxl_decoder_add_locked(cxld); } EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, "CXL"); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 71cc42d05248..e14c1d305b22 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2,6 +2,7 @@ /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ #include <linux/memregion.h> #include <linux/genalloc.h> +#include <linux/debugfs.h> #include <linux/device.h> #include <linux/module.h> #include <linux/memory.h> @@ -10,6 +11,7 @@ #include <linux/sort.h> #include <linux/idr.h> #include <linux/memory-tiers.h> +#include <linux/string_choices.h> #include <cxlmem.h> #include <cxl.h> #include "core.h" @@ -30,6 +32,12 @@ * 3. Decoder targets */ +/* + * nodemask that sets per node when the access_coordinates for the node has + * been updated by the CXL memory hotplug notifier. + */ +static nodemask_t nodemask_region_seen = NODE_MASK_NONE; + static struct cxl_region *to_cxl_region(struct device *dev); #define __ACCESS_ATTR_RO(_level, _name) { \ @@ -1468,9 +1476,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, dev_name(port->uport_dev), dev_name(&port->dev), __func__, cxld->interleave_ways, cxld->interleave_granularity, - (cxld->flags & CXL_DECODER_F_ENABLE) ? - "enabled" : - "disabled", + str_enabled_disabled(cxld->flags & CXL_DECODER_F_ENABLE), cxld->hpa_range.start, cxld->hpa_range.end); return -ENXIO; } @@ -1510,8 +1516,10 @@ add_target: cxl_rr->nr_targets_set); return -ENXIO; } - } else + } else { cxlsd->target[cxl_rr->nr_targets_set] = ep->dport; + cxlsd->cxld.target_map[cxl_rr->nr_targets_set] = ep->dport->port_id; + } inc = 1; out_target_set: cxl_rr->nr_targets_set += inc; @@ -2442,14 +2450,8 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid) for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { if (cxlr->coord[i].read_bandwidth) { - rc = 0; - if (cxl_need_node_perf_attrs_update(nid)) - node_set_perf_attrs(nid, &cxlr->coord[i], i); - else - rc = cxl_update_hmat_access_coordinates(nid, cxlr, i); - - if (rc == 0) - cset++; + node_update_perf_attrs(nid, &cxlr->coord[i], i); + cset++; } } @@ -2487,6 +2489,10 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb, if (nid != region_nid) return NOTIFY_DONE; + /* No action needed if node bit already set */ + if (node_test_and_set(nid, nodemask_region_seen)) + return NOTIFY_DONE; + if (!cxl_region_update_coordinates(cxlr, nid)) return NOTIFY_DONE; @@ -2918,6 +2924,16 @@ static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos) return false; } +static bool has_hpa_to_spa(struct cxl_root_decoder *cxlrd) +{ + return cxlrd->ops && cxlrd->ops->hpa_to_spa; +} + +static bool has_spa_to_hpa(struct cxl_root_decoder *cxlrd) +{ + return cxlrd->ops && cxlrd->ops->spa_to_hpa; +} + u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, u64 dpa) { @@ -2972,8 +2988,8 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, hpa = hpa_offset + p->res->start + p->cache_size; /* Root decoder translation overrides typical modulo decode */ - if (cxlrd->hpa_to_spa) - hpa = cxlrd->hpa_to_spa(cxlrd, hpa); + if (has_hpa_to_spa(cxlrd)) + hpa = cxlrd->ops->hpa_to_spa(cxlrd, hpa); if (!cxl_resource_contains_addr(p->res, hpa)) { dev_dbg(&cxlr->dev, @@ -2982,12 +2998,107 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, } /* Simple chunk check, by pos & gran, only applies to modulo decodes */ - if (!cxlrd->hpa_to_spa && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos))) + if (!has_hpa_to_spa(cxlrd) && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos))) return ULLONG_MAX; return hpa; } +struct dpa_result { + struct cxl_memdev *cxlmd; + u64 dpa; +}; + +static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset, + struct dpa_result *result) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_endpoint_decoder *cxled; + u64 hpa, hpa_offset, dpa_offset; + u64 bits_upper, bits_lower; + u64 shifted, rem, temp; + u16 eig = 0; + u8 eiw = 0; + int pos; + + lockdep_assert_held(&cxl_rwsem.region); + lockdep_assert_held(&cxl_rwsem.dpa); + + /* Input validation ensures valid ways and gran */ + granularity_to_eig(p->interleave_granularity, &eig); + ways_to_eiw(p->interleave_ways, &eiw); + + /* + * If the root decoder has SPA to CXL HPA callback, use it. Otherwise + * CXL HPA is assumed to equal SPA. + */ + if (has_spa_to_hpa(cxlrd)) { + hpa = cxlrd->ops->spa_to_hpa(cxlrd, p->res->start + offset); + hpa_offset = hpa - p->res->start; + } else { + hpa_offset = offset; + } + /* + * Interleave position: CXL Spec 3.2 Section 8.2.4.20.13 + * eiw < 8 + * Position is in the IW bits at HPA_OFFSET[IG+8+IW-1:IG+8]. + * Per spec "remove IW bits starting with bit position IG+8" + * eiw >= 8 + * Position is not explicitly stored in HPA_OFFSET bits. It is + * derived from the modulo operation of the upper bits using + * the total number of interleave ways. + */ + if (eiw < 8) { + pos = (hpa_offset >> (eig + 8)) & GENMASK(eiw - 1, 0); + } else { + shifted = hpa_offset >> (eig + 8); + div64_u64_rem(shifted, p->interleave_ways, &rem); + pos = rem; + } + if (pos < 0 || pos >= p->nr_targets) { + dev_dbg(&cxlr->dev, "Invalid position %d for %d targets\n", + pos, p->nr_targets); + return -ENXIO; + } + + /* + * DPA offset: CXL Spec 3.2 Section 8.2.4.20.13 + * Lower bits [IG+7:0] pass through unchanged + * (eiw < 8) + * Per spec: DPAOffset[51:IG+8] = (HPAOffset[51:IG+IW+8] >> IW) + * Clear the position bits to isolate upper section, then + * reverse the left shift by eiw that occurred during DPA->HPA + * (eiw >= 8) + * Per spec: DPAOffset[51:IG+8] = HPAOffset[51:IG+IW] / 3 + * Extract upper bits from the correct bit range and divide by 3 + * to recover the original DPA upper bits + */ + bits_lower = hpa_offset & GENMASK_ULL(eig + 7, 0); + if (eiw < 8) { + temp = hpa_offset &= ~((u64)GENMASK(eig + eiw + 8 - 1, 0)); + dpa_offset = temp >> eiw; + } else { + bits_upper = div64_u64(hpa_offset >> (eig + eiw), 3); + dpa_offset = bits_upper << (eig + 8); + } + dpa_offset |= bits_lower; + + /* Look-up and return the result: a memdev and a DPA */ + for (int i = 0; i < p->nr_targets; i++) { + cxled = p->targets[i]; + if (cxled->pos != pos) + continue; + result->cxlmd = cxled_to_memdev(cxled); + result->dpa = cxl_dpa_resource_start(cxled) + dpa_offset; + + return 0; + } + dev_err(&cxlr->dev, "No device found for position %d\n", pos); + + return -ENXIO; +} + static struct lock_class_key cxl_pmem_region_key; static int cxl_pmem_region_alloc(struct cxl_region *cxlr) @@ -3542,6 +3653,105 @@ static void shutdown_notifiers(void *_cxlr) unregister_mt_adistance_algorithm(&cxlr->adist_notifier); } +static void remove_debugfs(void *dentry) +{ + debugfs_remove_recursive(dentry); +} + +static int validate_region_offset(struct cxl_region *cxlr, u64 offset) +{ + struct cxl_region_params *p = &cxlr->params; + resource_size_t region_size; + u64 hpa; + + if (offset < p->cache_size) { + dev_err(&cxlr->dev, + "Offset %#llx is within extended linear cache %pr\n", + offset, &p->cache_size); + return -EINVAL; + } + + region_size = resource_size(p->res); + if (offset >= region_size) { + dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pr\n", + offset, ®ion_size); + return -EINVAL; + } + + hpa = p->res->start + offset; + if (hpa < p->res->start || hpa > p->res->end) { + dev_err(&cxlr->dev, "HPA %#llx not in region %pr\n", hpa, + p->res); + return -EINVAL; + } + + return 0; +} + +static int cxl_region_debugfs_poison_inject(void *data, u64 offset) +{ + struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL }; + struct cxl_region *cxlr = data; + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + if (validate_region_offset(cxlr, offset)) + return -EINVAL; + + rc = region_offset_to_dpa_result(cxlr, offset, &result); + if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) { + dev_dbg(&cxlr->dev, + "Failed to resolve DPA for region offset %#llx rc %d\n", + offset, rc); + + return rc ? rc : -EINVAL; + } + + return cxl_inject_poison_locked(result.cxlmd, result.dpa); +} + +DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_inject_fops, NULL, + cxl_region_debugfs_poison_inject, "%llx\n"); + +static int cxl_region_debugfs_poison_clear(void *data, u64 offset) +{ + struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL }; + struct cxl_region *cxlr = data; + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + if (validate_region_offset(cxlr, offset)) + return -EINVAL; + + rc = region_offset_to_dpa_result(cxlr, offset, &result); + if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) { + dev_dbg(&cxlr->dev, + "Failed to resolve DPA for region offset %#llx rc %d\n", + offset, rc); + + return rc ? rc : -EINVAL; + } + + return cxl_clear_poison_locked(result.cxlmd, result.dpa); +} + +DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL, + cxl_region_debugfs_poison_clear, "%llx\n"); + static int cxl_region_can_probe(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; @@ -3571,6 +3781,7 @@ static int cxl_region_probe(struct device *dev) { struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; + bool poison_supported = true; int rc; rc = cxl_region_can_probe(cxlr); @@ -3594,6 +3805,31 @@ static int cxl_region_probe(struct device *dev) if (rc) return rc; + /* Create poison attributes if all memdevs support the capabilities */ + for (int i = 0; i < p->nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + + if (!cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_INJECT) || + !cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_CLEAR)) { + poison_supported = false; + break; + } + } + + if (poison_supported) { + struct dentry *dentry; + + dentry = cxl_debugfs_create_dir(dev_name(dev)); + debugfs_create_file("inject_poison", 0200, dentry, cxlr, + &cxl_poison_inject_fops); + debugfs_create_file("clear_poison", 0200, dentry, cxlr, + &cxl_poison_clear_fops); + rc = devm_add_action_or_reset(dev, remove_debugfs, dentry); + if (rc) + return rc; + } + switch (cxlr->mode) { case CXL_PARTMODE_PMEM: rc = devm_cxl_region_edac_register(cxlr); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 847e37be42c4..231ddccf8977 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -357,6 +357,9 @@ enum cxl_decoder_type { * @target_type: accelerator vs expander (type2 vs type3) selector * @region: currently assigned region for this decoder * @flags: memory type capabilities and locking + * @target_map: cached copy of hardware port-id list, available at init + * before all @dport objects have been instantiated. While + * dport id is 8bit, CFMWS interleave targets are 32bits. * @commit: device/decoder-type specific callback to commit settings to hw * @reset: device/decoder-type specific callback to reset hw settings */ @@ -369,6 +372,7 @@ struct cxl_decoder { enum cxl_decoder_type target_type; struct cxl_region *region; unsigned long flags; + u32 target_map[CXL_DECODER_MAX_INTERLEAVE]; int (*commit)(struct cxl_decoder *cxld); void (*reset)(struct cxl_decoder *cxld); }; @@ -419,27 +423,35 @@ struct cxl_switch_decoder { }; struct cxl_root_decoder; -typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa); +/** + * struct cxl_rd_ops - CXL root decoder callback operations + * @hpa_to_spa: Convert host physical address to system physical address + * @spa_to_hpa: Convert system physical address to host physical address + */ +struct cxl_rd_ops { + u64 (*hpa_to_spa)(struct cxl_root_decoder *cxlrd, u64 hpa); + u64 (*spa_to_hpa)(struct cxl_root_decoder *cxlrd, u64 spa); +}; /** * struct cxl_root_decoder - Static platform CXL address decoder * @res: host / parent resource for region allocations * @cache_size: extended linear cache size if exists, otherwise zero. * @region_id: region id for next region provisioning event - * @hpa_to_spa: translate CXL host-physical-address to Platform system-physical-address * @platform_data: platform specific configuration data * @range_lock: sync region autodiscovery by address range * @qos_class: QoS performance class cookie + * @ops: CXL root decoder operations * @cxlsd: base cxl switch decoder */ struct cxl_root_decoder { struct resource *res; resource_size_t cache_size; atomic_t region_id; - cxl_hpa_to_spa_fn hpa_to_spa; void *platform_data; struct mutex range_lock; int qos_class; + struct cxl_rd_ops *ops; struct cxl_switch_decoder cxlsd; }; @@ -595,6 +607,7 @@ struct cxl_dax_region { * @cdat: Cached CDAT data * @cdat_available: Should a CDAT attribute be available in sysfs * @pci_latency: Upstream latency in picoseconds + * @component_reg_phys: Physical address of component register */ struct cxl_port { struct device dev; @@ -618,6 +631,7 @@ struct cxl_port { } cdat; bool cdat_available; long pci_latency; + resource_size_t component_reg_phys; }; /** @@ -781,9 +795,9 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); -int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map); +int cxl_decoder_add(struct cxl_decoder *cxld); struct cxl_endpoint_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port); -int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map); +int cxl_decoder_add_locked(struct cxl_decoder *cxld); int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld); static inline int cxl_root_decoder_autoremove(struct device *host, struct cxl_root_decoder *cxlrd) @@ -806,12 +820,10 @@ struct cxl_endpoint_dvsec_info { struct range dvsec_range[2]; }; -struct cxl_hdm; -struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, - struct cxl_endpoint_dvsec_info *info); -int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info); -int devm_cxl_add_passthrough_decoder(struct cxl_port *port); +int devm_cxl_switch_port_decoders_setup(struct cxl_port *port); +int __devm_cxl_switch_port_decoders_setup(struct cxl_port *port); +int devm_cxl_endpoint_decoders_setup(struct cxl_port *port); + struct cxl_dev_state; int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, struct cxl_endpoint_dvsec_info *info); @@ -890,7 +902,7 @@ static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, #endif void cxl_endpoint_parse_cdat(struct cxl_port *port); -void cxl_switch_parse_cdat(struct cxl_port *port); +void cxl_switch_parse_cdat(struct cxl_dport *dport); int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, struct access_coordinate *coord); @@ -905,6 +917,10 @@ void cxl_coordinates_combine(struct access_coordinate *out, struct access_coordinate *c2); bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); +struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev); +struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev); /* * Unit test builds overrides this to __weak, find the 'strong' version @@ -915,4 +931,21 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); #endif u16 cxl_gpf_get_dvsec(struct device *dev); + +/* + * Declaration for functions that are mocked by cxl_test that are called by + * cxl_core. The respective functions are defined as __foo() and called by + * cxl_core as foo(). The macros below ensures that those functions would + * exist as foo(). See tools/testing/cxl/cxl_core_exports.c and + * tools/testing/cxl/exports.h for setting up the mock functions. The dance + * is done to avoid a circular dependency where cxl_core calls a function that + * ends up being a mock function and goes to * cxl_test where it calls a + * cxl_core function. + */ +#ifndef CXL_TEST_ENABLE +#define DECLARE_TESTABLE(x) __##x +#define devm_cxl_add_dport_by_dev DECLARE_TESTABLE(devm_cxl_add_dport_by_dev) +#define devm_cxl_switch_port_decoders_setup DECLARE_TESTABLE(devm_cxl_switch_port_decoders_setup) +#endif + #endif /* __CXL_H__ */ diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 751478dfc410..434031a0c1f7 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -869,6 +869,8 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, int cxl_trigger_poison_list(struct cxl_memdev *cxlmd); int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa); int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa); +int cxl_inject_poison_locked(struct cxl_memdev *cxlmd, u64 dpa); +int cxl_clear_poison_locked(struct cxl_memdev *cxlmd, u64 dpa); #ifdef CONFIG_CXL_EDAC_MEM_FEATURES int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 54e219b0049e..7ae621e618e7 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -129,8 +129,6 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev) int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; -int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info); void read_cdat_data(struct cxl_port *port); void cxl_cor_error_detected(struct pci_dev *pdev); pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index cf32dc50b7a6..51c8f2f84717 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -59,55 +59,20 @@ static int discover_region(struct device *dev, void *unused) static int cxl_switch_port_probe(struct cxl_port *port) { - struct cxl_hdm *cxlhdm; - int rc; + /* Reset nr_dports for rebind of driver */ + port->nr_dports = 0; /* Cache the data early to ensure is_visible() works */ read_cdat_data(port); - rc = devm_cxl_port_enumerate_dports(port); - if (rc < 0) - return rc; - - cxl_switch_parse_cdat(port); - - cxlhdm = devm_cxl_setup_hdm(port, NULL); - if (!IS_ERR(cxlhdm)) - return devm_cxl_enumerate_decoders(cxlhdm, NULL); - - if (PTR_ERR(cxlhdm) != -ENODEV) { - dev_err(&port->dev, "Failed to map HDM decoder capability\n"); - return PTR_ERR(cxlhdm); - } - - if (rc == 1) { - dev_dbg(&port->dev, "Fallback to passthrough decoder\n"); - return devm_cxl_add_passthrough_decoder(port); - } - - dev_err(&port->dev, "HDM decoder capability not found\n"); - return -ENXIO; + return 0; } static int cxl_endpoint_port_probe(struct cxl_port *port) { - struct cxl_endpoint_dvsec_info info = { .port = port }; struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_hdm *cxlhdm; int rc; - rc = cxl_dvsec_rr_decode(cxlds, &info); - if (rc < 0) - return rc; - - cxlhdm = devm_cxl_setup_hdm(port, &info); - if (IS_ERR(cxlhdm)) { - if (PTR_ERR(cxlhdm) == -ENODEV) - dev_err(&port->dev, "HDM decoder registers not found\n"); - return PTR_ERR(cxlhdm); - } - /* Cache the data early to ensure is_visible() works */ read_cdat_data(port); cxl_endpoint_parse_cdat(port); @@ -117,11 +82,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) if (rc) return rc; - rc = cxl_hdm_decode_init(cxlds, cxlhdm, &info); - if (rc) - return rc; - - rc = devm_cxl_enumerate_decoders(cxlhdm, &info); + rc = devm_cxl_endpoint_decoders_setup(port); if (rc) return rc; diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 9c1c546fe443..02bab136385e 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -3,7 +3,11 @@ #ifndef _IDXD_REGISTERS_H_ #define _IDXD_REGISTERS_H_ +#ifdef __KERNEL__ #include <uapi/linux/idxd.h> +#else +#include <linux/idxd.h> +#endif /* PCI Config */ #define PCI_DEVICE_ID_INTEL_DSA_GNRD 0x11fb diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index a180171087a8..12a4a4860a74 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -19,6 +19,8 @@ #define IOAT_DMA_DCA_ANY_CPU ~0 +int system_has_dca_enabled(struct pci_dev *pdev); + #define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, dma_dev) #define to_dev(ioat_chan) (&(ioat_chan)->ioat_dma->pdev->dev) #define to_pdev(ioat_chan) ((ioat_chan)->ioat_dma->pdev) diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index 79e4e4c09c18..0373c48520c9 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -63,9 +63,6 @@ #define IOAT_VER_3_3 0x33 /* Version 3.3 */ #define IOAT_VER_3_4 0x34 /* Version 3.4 */ - -int system_has_dca_enabled(struct pci_dev *pdev); - #define IOAT_DESC_SZ 64 struct ioat_dma_descriptor { diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 6d12c6ab9ea4..a61c6dc63c29 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -634,6 +634,13 @@ config RISCV_IMSIC select GENERIC_MSI_IRQ select IRQ_MSI_LIB +config RISCV_RPMI_SYSMSI + bool + depends on RISCV && MAILBOX + select IRQ_DOMAIN_HIERARCHY + select GENERIC_MSI_IRQ + default RISCV + config SIFIVE_PLIC bool depends on RISCV diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 93e3ced023bb..3de083f5484c 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -106,6 +106,7 @@ obj-$(CONFIG_RISCV_INTC) += irq-riscv-intc.o obj-$(CONFIG_RISCV_APLIC) += irq-riscv-aplic-main.o irq-riscv-aplic-direct.o obj-$(CONFIG_RISCV_APLIC_MSI) += irq-riscv-aplic-msi.o obj-$(CONFIG_RISCV_IMSIC) += irq-riscv-imsic-state.o irq-riscv-imsic-early.o irq-riscv-imsic-platform.o +obj-$(CONFIG_RISCV_RPMI_SYSMSI) += irq-riscv-rpmi-sysmsi.o obj-$(CONFIG_SIFIVE_PLIC) += irq-sifive-plic.o obj-$(CONFIG_STARFIVE_JH8100_INTC) += irq-starfive-jh8100-intc.o obj-$(CONFIG_ACLINT_SSWI) += irq-aclint-sswi.o diff --git a/drivers/irqchip/irq-gic-v5.c b/drivers/irqchip/irq-gic-v5.c index 4bd224f359a7..41ef286c4d78 100644 --- a/drivers/irqchip/irq-gic-v5.c +++ b/drivers/irqchip/irq-gic-v5.c @@ -1062,16 +1062,9 @@ static void gicv5_set_cpuif_idbits(void) #ifdef CONFIG_KVM static struct gic_kvm_info gic_v5_kvm_info __initdata; -static bool __init gicv5_cpuif_has_gcie_legacy(void) -{ - u64 idr0 = read_sysreg_s(SYS_ICC_IDR0_EL1); - return !!FIELD_GET(ICC_IDR0_EL1_GCIE_LEGACY, idr0); -} - static void __init gic_of_setup_kvm_info(struct device_node *node) { gic_v5_kvm_info.type = GIC_V5; - gic_v5_kvm_info.has_gcie_v3_compat = gicv5_cpuif_has_gcie_legacy(); /* GIC Virtual CPU interface maintenance interrupt */ gic_v5_kvm_info.no_maint_irq_mask = false; diff --git a/drivers/irqchip/irq-riscv-imsic-early.c b/drivers/irqchip/irq-riscv-imsic-early.c index 2709cacf4855..2c4c682627b8 100644 --- a/drivers/irqchip/irq-riscv-imsic-early.c +++ b/drivers/irqchip/irq-riscv-imsic-early.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "riscv-imsic: " fmt #include <linux/acpi.h> #include <linux/cpu.h> +#include <linux/export.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/io.h> @@ -233,6 +234,7 @@ struct fwnode_handle *imsic_acpi_get_fwnode(struct device *dev) { return imsic_acpi_fwnode; } +EXPORT_SYMBOL_GPL(imsic_acpi_get_fwnode); static int __init imsic_early_acpi_init(union acpi_subtable_headers *header, const unsigned long end) diff --git a/drivers/irqchip/irq-riscv-rpmi-sysmsi.c b/drivers/irqchip/irq-riscv-rpmi-sysmsi.c new file mode 100644 index 000000000000..5c74c561ce31 --- /dev/null +++ b/drivers/irqchip/irq-riscv-rpmi-sysmsi.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Ventana Micro Systems Inc. */ + +#include <linux/acpi.h> +#include <linux/bits.h> +#include <linux/bug.h> +#include <linux/device.h> +#include <linux/device/devres.h> +#include <linux/dev_printk.h> +#include <linux/errno.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/irqchip/riscv-imsic.h> +#include <linux/mailbox_client.h> +#include <linux/mailbox/riscv-rpmi-message.h> +#include <linux/module.h> +#include <linux/msi.h> +#include <linux/of_irq.h> +#include <linux/platform_device.h> +#include <linux/types.h> + +struct rpmi_sysmsi_get_attrs_rx { + __le32 status; + __le32 sys_num_msi; + __le32 flag0; + __le32 flag1; +}; + +#define RPMI_SYSMSI_MSI_ATTRIBUTES_FLAG0_PREF_PRIV BIT(0) + +struct rpmi_sysmsi_set_msi_state_tx { + __le32 sys_msi_index; + __le32 sys_msi_state; +}; + +struct rpmi_sysmsi_set_msi_state_rx { + __le32 status; +}; + +#define RPMI_SYSMSI_MSI_STATE_ENABLE BIT(0) +#define RPMI_SYSMSI_MSI_STATE_PENDING BIT(1) + +struct rpmi_sysmsi_set_msi_target_tx { + __le32 sys_msi_index; + __le32 sys_msi_address_low; + __le32 sys_msi_address_high; + __le32 sys_msi_data; +}; + +struct rpmi_sysmsi_set_msi_target_rx { + __le32 status; +}; + +struct rpmi_sysmsi_priv { + struct device *dev; + struct mbox_client client; + struct mbox_chan *chan; + u32 nr_irqs; + u32 gsi_base; +}; + +static int rpmi_sysmsi_get_num_msi(struct rpmi_sysmsi_priv *priv) +{ + struct rpmi_sysmsi_get_attrs_rx rx; + struct rpmi_mbox_message msg; + int ret; + + rpmi_mbox_init_send_with_response(&msg, RPMI_SYSMSI_SRV_GET_ATTRIBUTES, + NULL, 0, &rx, sizeof(rx)); + ret = rpmi_mbox_send_message(priv->chan, &msg); + if (ret) + return ret; + if (rx.status) + return rpmi_to_linux_error(le32_to_cpu(rx.status)); + + return le32_to_cpu(rx.sys_num_msi); +} + +static int rpmi_sysmsi_set_msi_state(struct rpmi_sysmsi_priv *priv, + u32 sys_msi_index, u32 sys_msi_state) +{ + struct rpmi_sysmsi_set_msi_state_tx tx; + struct rpmi_sysmsi_set_msi_state_rx rx; + struct rpmi_mbox_message msg; + int ret; + + tx.sys_msi_index = cpu_to_le32(sys_msi_index); + tx.sys_msi_state = cpu_to_le32(sys_msi_state); + rpmi_mbox_init_send_with_response(&msg, RPMI_SYSMSI_SRV_SET_MSI_STATE, + &tx, sizeof(tx), &rx, sizeof(rx)); + ret = rpmi_mbox_send_message(priv->chan, &msg); + if (ret) + return ret; + if (rx.status) + return rpmi_to_linux_error(le32_to_cpu(rx.status)); + + return 0; +} + +static int rpmi_sysmsi_set_msi_target(struct rpmi_sysmsi_priv *priv, + u32 sys_msi_index, struct msi_msg *m) +{ + struct rpmi_sysmsi_set_msi_target_tx tx; + struct rpmi_sysmsi_set_msi_target_rx rx; + struct rpmi_mbox_message msg; + int ret; + + tx.sys_msi_index = cpu_to_le32(sys_msi_index); + tx.sys_msi_address_low = cpu_to_le32(m->address_lo); + tx.sys_msi_address_high = cpu_to_le32(m->address_hi); + tx.sys_msi_data = cpu_to_le32(m->data); + rpmi_mbox_init_send_with_response(&msg, RPMI_SYSMSI_SRV_SET_MSI_TARGET, + &tx, sizeof(tx), &rx, sizeof(rx)); + ret = rpmi_mbox_send_message(priv->chan, &msg); + if (ret) + return ret; + if (rx.status) + return rpmi_to_linux_error(le32_to_cpu(rx.status)); + + return 0; +} + +static void rpmi_sysmsi_irq_mask(struct irq_data *d) +{ + struct rpmi_sysmsi_priv *priv = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + int ret; + + ret = rpmi_sysmsi_set_msi_state(priv, hwirq, 0); + if (ret) + dev_warn(priv->dev, "Failed to mask hwirq %lu (error %d)\n", hwirq, ret); + irq_chip_mask_parent(d); +} + +static void rpmi_sysmsi_irq_unmask(struct irq_data *d) +{ + struct rpmi_sysmsi_priv *priv = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + int ret; + + irq_chip_unmask_parent(d); + ret = rpmi_sysmsi_set_msi_state(priv, hwirq, RPMI_SYSMSI_MSI_STATE_ENABLE); + if (ret) + dev_warn(priv->dev, "Failed to unmask hwirq %lu (error %d)\n", hwirq, ret); +} + +static void rpmi_sysmsi_write_msg(struct irq_data *d, struct msi_msg *msg) +{ + struct rpmi_sysmsi_priv *priv = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + int ret; + + /* For zeroed MSI, do nothing as of now */ + if (!msg->address_hi && !msg->address_lo && !msg->data) + return; + + ret = rpmi_sysmsi_set_msi_target(priv, hwirq, msg); + if (ret) + dev_warn(priv->dev, "Failed to set target for hwirq %lu (error %d)\n", hwirq, ret); +} + +static void rpmi_sysmsi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) +{ + arg->desc = desc; + arg->hwirq = desc->data.icookie.value; +} + +static int rpmi_sysmsi_translate(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *hwirq, unsigned int *type) +{ + struct msi_domain_info *info = d->host_data; + struct rpmi_sysmsi_priv *priv = info->data; + + if (WARN_ON(fwspec->param_count < 1)) + return -EINVAL; + + /* For DT, gsi_base is always zero. */ + *hwirq = fwspec->param[0] - priv->gsi_base; + *type = IRQ_TYPE_NONE; + return 0; +} + +static const struct msi_domain_template rpmi_sysmsi_template = { + .chip = { + .name = "RPMI-SYSMSI", + .irq_mask = rpmi_sysmsi_irq_mask, + .irq_unmask = rpmi_sysmsi_irq_unmask, +#ifdef CONFIG_SMP + .irq_set_affinity = irq_chip_set_affinity_parent, +#endif + .irq_write_msi_msg = rpmi_sysmsi_write_msg, + .flags = IRQCHIP_SET_TYPE_MASKED | + IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_MASK_ON_SUSPEND, + }, + + .ops = { + .set_desc = rpmi_sysmsi_set_desc, + .msi_translate = rpmi_sysmsi_translate, + }, + + .info = { + .bus_token = DOMAIN_BUS_WIRED_TO_MSI, + .flags = MSI_FLAG_USE_DEV_FWNODE, + .handler = handle_simple_irq, + .handler_name = "simple", + }, +}; + +static int rpmi_sysmsi_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct rpmi_sysmsi_priv *priv; + struct fwnode_handle *fwnode; + u32 id; + int rc; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + priv->dev = dev; + + /* Setup mailbox client */ + priv->client.dev = priv->dev; + priv->client.rx_callback = NULL; + priv->client.tx_block = false; + priv->client.knows_txdone = true; + priv->client.tx_tout = 0; + + /* Request mailbox channel */ + priv->chan = mbox_request_channel(&priv->client, 0); + if (IS_ERR(priv->chan)) + return PTR_ERR(priv->chan); + + /* Get number of system MSIs */ + rc = rpmi_sysmsi_get_num_msi(priv); + if (rc < 1) { + mbox_free_channel(priv->chan); + if (rc) + return dev_err_probe(dev, rc, "Failed to get number of system MSIs\n"); + else + return dev_err_probe(dev, -ENODEV, "No system MSIs found\n"); + } + priv->nr_irqs = rc; + + fwnode = dev_fwnode(dev); + if (is_acpi_node(fwnode)) { + u32 nr_irqs; + + rc = riscv_acpi_get_gsi_info(fwnode, &priv->gsi_base, &id, + &nr_irqs, NULL); + if (rc) { + dev_err(dev, "failed to find GSI mapping\n"); + return rc; + } + + /* Update with actual GSI range */ + if (nr_irqs != priv->nr_irqs) + riscv_acpi_update_gsi_range(priv->gsi_base, priv->nr_irqs); + } + + /* + * The device MSI domain for platform devices on RISC-V architecture + * is only available after the MSI controller driver is probed so, + * explicitly configure here. + */ + if (!dev_get_msi_domain(dev)) { + /* + * The device MSI domain for OF devices is only set at the + * time of populating/creating OF device. If the device MSI + * domain is discovered later after the OF device is created + * then we need to set it explicitly before using any platform + * MSI functions. + */ + if (is_of_node(fwnode)) { + of_msi_configure(dev, dev_of_node(dev)); + } else if (is_acpi_device_node(fwnode)) { + struct irq_domain *msi_domain; + + msi_domain = irq_find_matching_fwnode(imsic_acpi_get_fwnode(dev), + DOMAIN_BUS_PLATFORM_MSI); + dev_set_msi_domain(dev, msi_domain); + } + + if (!dev_get_msi_domain(dev)) { + mbox_free_channel(priv->chan); + return -EPROBE_DEFER; + } + } + + if (!msi_create_device_irq_domain(dev, MSI_DEFAULT_DOMAIN, + &rpmi_sysmsi_template, + priv->nr_irqs, priv, priv)) { + mbox_free_channel(priv->chan); + return dev_err_probe(dev, -ENOMEM, "failed to create MSI irq domain\n"); + } + +#ifdef CONFIG_ACPI + struct acpi_device *adev = ACPI_COMPANION(dev); + + if (adev) + acpi_dev_clear_dependencies(adev); +#endif + + dev_info(dev, "%u system MSIs registered\n", priv->nr_irqs); + return 0; +} + +static const struct of_device_id rpmi_sysmsi_match[] = { + { .compatible = "riscv,rpmi-system-msi" }, + {} +}; + +static const struct acpi_device_id acpi_rpmi_sysmsi_match[] = { + { "RSCV0006" }, + {} +}; +MODULE_DEVICE_TABLE(acpi, acpi_rpmi_sysmsi_match); + +static struct platform_driver rpmi_sysmsi_driver = { + .driver = { + .name = "rpmi-sysmsi", + .of_match_table = rpmi_sysmsi_match, + .acpi_match_table = acpi_rpmi_sysmsi_match, + }, + .probe = rpmi_sysmsi_probe, +}; +builtin_platform_driver(rpmi_sysmsi_driver); diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig index 02432d4a5ccd..c488725aa2a5 100644 --- a/drivers/mailbox/Kconfig +++ b/drivers/mailbox/Kconfig @@ -369,4 +369,15 @@ config BCM74110_MAILBOX processor and coprocessor that handles various power management task and more. +config RISCV_SBI_MPXY_MBOX + tristate "RISC-V SBI Message Proxy (MPXY) Mailbox" + depends on RISCV_SBI + default RISCV + help + Mailbox driver implementation for RISC-V SBI Message Proxy (MPXY) + extension. This mailbox driver is used to send messages to the + remote processor through the SBI implementation (M-mode firmware + or HS-mode hypervisor). Say Y here if you want to have this support. + If unsure say N. + endif diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile index 98a68f838486..f9a11f5639aa 100644 --- a/drivers/mailbox/Makefile +++ b/drivers/mailbox/Makefile @@ -78,3 +78,5 @@ obj-$(CONFIG_THEAD_TH1520_MBOX) += mailbox-th1520.o obj-$(CONFIG_CIX_MBOX) += cix-mailbox.o obj-$(CONFIG_BCM74110_MAILBOX) += bcm74110-mailbox.o + +obj-$(CONFIG_RISCV_SBI_MPXY_MBOX) += riscv-sbi-mpxy-mbox.o diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 5cd8ae222073..2acc6ec229a4 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/mutex.h> #include <linux/of.h> +#include <linux/property.h> #include <linux/spinlock.h> #include "mailbox.h" @@ -383,34 +384,56 @@ EXPORT_SYMBOL_GPL(mbox_bind_client); */ struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index) { - struct device *dev = cl->dev; + struct fwnode_reference_args fwspec; + struct fwnode_handle *fwnode; struct mbox_controller *mbox; struct of_phandle_args spec; struct mbox_chan *chan; + struct device *dev; + unsigned int i; int ret; - if (!dev || !dev->of_node) { - pr_debug("%s: No owner device node\n", __func__); + dev = cl->dev; + if (!dev) { + pr_debug("No owner device\n"); return ERR_PTR(-ENODEV); } - ret = of_parse_phandle_with_args(dev->of_node, "mboxes", "#mbox-cells", - index, &spec); + fwnode = dev_fwnode(dev); + if (!fwnode) { + dev_dbg(dev, "No owner fwnode\n"); + return ERR_PTR(-ENODEV); + } + + ret = fwnode_property_get_reference_args(fwnode, "mboxes", "#mbox-cells", + 0, index, &fwspec); if (ret) { - dev_err(dev, "%s: can't parse \"mboxes\" property\n", __func__); + dev_err(dev, "%s: can't parse \"%s\" property\n", __func__, "mboxes"); return ERR_PTR(ret); } + spec.np = to_of_node(fwspec.fwnode); + spec.args_count = fwspec.nargs; + for (i = 0; i < spec.args_count; i++) + spec.args[i] = fwspec.args[i]; + scoped_guard(mutex, &con_mutex) { chan = ERR_PTR(-EPROBE_DEFER); - list_for_each_entry(mbox, &mbox_cons, node) - if (mbox->dev->of_node == spec.np) { - chan = mbox->of_xlate(mbox, &spec); - if (!IS_ERR(chan)) - break; + list_for_each_entry(mbox, &mbox_cons, node) { + if (device_match_fwnode(mbox->dev, fwspec.fwnode)) { + if (mbox->fw_xlate) { + chan = mbox->fw_xlate(mbox, &fwspec); + if (!IS_ERR(chan)) + break; + } else if (mbox->of_xlate) { + chan = mbox->of_xlate(mbox, &spec); + if (!IS_ERR(chan)) + break; + } } + } - of_node_put(spec.np); + fwnode_handle_put(fwspec.fwnode); if (IS_ERR(chan)) return chan; @@ -427,15 +450,8 @@ EXPORT_SYMBOL_GPL(mbox_request_channel); struct mbox_chan *mbox_request_channel_byname(struct mbox_client *cl, const char *name) { - struct device_node *np = cl->dev->of_node; - int index; - - if (!np) { - dev_err(cl->dev, "%s() currently only supports DT\n", __func__); - return ERR_PTR(-EINVAL); - } + int index = device_property_match_string(cl->dev, "mbox-names", name); - index = of_property_match_string(np, "mbox-names", name); if (index < 0) { dev_err(cl->dev, "%s() could not locate channel named \"%s\"\n", __func__, name); @@ -470,9 +486,8 @@ void mbox_free_channel(struct mbox_chan *chan) } EXPORT_SYMBOL_GPL(mbox_free_channel); -static struct mbox_chan * -of_mbox_index_xlate(struct mbox_controller *mbox, - const struct of_phandle_args *sp) +static struct mbox_chan *fw_mbox_index_xlate(struct mbox_controller *mbox, + const struct fwnode_reference_args *sp) { int ind = sp->args[0]; @@ -523,8 +538,8 @@ int mbox_controller_register(struct mbox_controller *mbox) spin_lock_init(&chan->lock); } - if (!mbox->of_xlate) - mbox->of_xlate = of_mbox_index_xlate; + if (!mbox->fw_xlate && !mbox->of_xlate) + mbox->fw_xlate = fw_mbox_index_xlate; scoped_guard(mutex, &con_mutex) list_add_tail(&mbox->node, &mbox_cons); diff --git a/drivers/mailbox/riscv-sbi-mpxy-mbox.c b/drivers/mailbox/riscv-sbi-mpxy-mbox.c new file mode 100644 index 000000000000..7c9c006b7244 --- /dev/null +++ b/drivers/mailbox/riscv-sbi-mpxy-mbox.c @@ -0,0 +1,1019 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V SBI Message Proxy (MPXY) mailbox controller driver + * + * Copyright (C) 2025 Ventana Micro Systems Inc. + */ + +#include <linux/acpi.h> +#include <linux/cpu.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/irqchip/riscv-imsic.h> +#include <linux/mailbox_controller.h> +#include <linux/mailbox/riscv-rpmi-message.h> +#include <linux/minmax.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/msi.h> +#include <linux/of_irq.h> +#include <linux/percpu.h> +#include <linux/platform_device.h> +#include <linux/smp.h> +#include <linux/string.h> +#include <linux/types.h> +#include <asm/byteorder.h> +#include <asm/sbi.h> + +/* ====== SBI MPXY extension data structures ====== */ + +/* SBI MPXY MSI related channel attributes */ +struct sbi_mpxy_msi_info { + /* Lower 32-bits of the MSI target address */ + u32 msi_addr_lo; + /* Upper 32-bits of the MSI target address */ + u32 msi_addr_hi; + /* MSI data value */ + u32 msi_data; +}; + +/* + * SBI MPXY standard channel attributes. + * + * NOTE: The sequence of attribute fields are as-per the + * defined sequence in the attribute table in spec (or + * as-per the enum sbi_mpxy_attribute_id). + */ +struct sbi_mpxy_channel_attrs { + /* Message protocol ID */ + u32 msg_proto_id; + /* Message protocol version */ + u32 msg_proto_version; + /* Message protocol maximum message length */ + u32 msg_max_len; + /* Message protocol message send timeout in microseconds */ + u32 msg_send_timeout; + /* Message protocol message completion timeout in microseconds */ + u32 msg_completion_timeout; + /* Bit array for channel capabilities */ + u32 capability; + /* SSE event ID */ + u32 sse_event_id; + /* MSI enable/disable control knob */ + u32 msi_control; + /* Channel MSI info */ + struct sbi_mpxy_msi_info msi_info; + /* Events state control */ + u32 events_state_ctrl; +}; + +/* + * RPMI specific SBI MPXY channel attributes. + * + * NOTE: The sequence of attribute fields are as-per the + * defined sequence in the attribute table in spec (or + * as-per the enum sbi_mpxy_rpmi_attribute_id). + */ +struct sbi_mpxy_rpmi_channel_attrs { + /* RPMI service group ID */ + u32 servicegroup_id; + /* RPMI service group version */ + u32 servicegroup_version; + /* RPMI implementation ID */ + u32 impl_id; + /* RPMI implementation version */ + u32 impl_version; +}; + +/* SBI MPXY channel IDs data in shared memory */ +struct sbi_mpxy_channel_ids_data { + /* Remaining number of channel ids */ + __le32 remaining; + /* Returned channel ids in current function call */ + __le32 returned; + /* Returned channel id array */ + __le32 channel_array[]; +}; + +/* SBI MPXY notification data in shared memory */ +struct sbi_mpxy_notification_data { + /* Remaining number of notification events */ + __le32 remaining; + /* Number of notification events returned */ + __le32 returned; + /* Number of notification events lost */ + __le32 lost; + /* Reserved for future use */ + __le32 reserved; + /* Returned channel id array */ + u8 events_data[]; +}; + +/* ====== MPXY data structures & helper routines ====== */ + +/* MPXY Per-CPU or local context */ +struct mpxy_local { + /* Shared memory base address */ + void *shmem; + /* Shared memory physical address */ + phys_addr_t shmem_phys_addr; + /* Flag representing whether shared memory is active or not */ + bool shmem_active; +}; + +static DEFINE_PER_CPU(struct mpxy_local, mpxy_local); +static unsigned long mpxy_shmem_size; +static bool mpxy_shmem_init_done; + +static int mpxy_get_channel_count(u32 *channel_count) +{ + struct mpxy_local *mpxy = this_cpu_ptr(&mpxy_local); + struct sbi_mpxy_channel_ids_data *sdata = mpxy->shmem; + u32 remaining, returned; + struct sbiret sret; + + if (!mpxy->shmem_active) + return -ENODEV; + if (!channel_count) + return -EINVAL; + + get_cpu(); + + /* Get the remaining and returned fields to calculate total */ + sret = sbi_ecall(SBI_EXT_MPXY, SBI_EXT_MPXY_GET_CHANNEL_IDS, + 0, 0, 0, 0, 0, 0); + if (sret.error) + goto err_put_cpu; + + remaining = le32_to_cpu(sdata->remaining); + returned = le32_to_cpu(sdata->returned); + *channel_count = remaining + returned; + +err_put_cpu: + put_cpu(); + return sbi_err_map_linux_errno(sret.error); +} + +static int mpxy_get_channel_ids(u32 channel_count, u32 *channel_ids) +{ + struct mpxy_local *mpxy = this_cpu_ptr(&mpxy_local); + struct sbi_mpxy_channel_ids_data *sdata = mpxy->shmem; + u32 remaining, returned, count, start_index = 0; + struct sbiret sret; + + if (!mpxy->shmem_active) + return -ENODEV; + if (!channel_count || !channel_ids) + return -EINVAL; + + get_cpu(); + + do { + sret = sbi_ecall(SBI_EXT_MPXY, SBI_EXT_MPXY_GET_CHANNEL_IDS, + start_index, 0, 0, 0, 0, 0); + if (sret.error) + goto err_put_cpu; + + remaining = le32_to_cpu(sdata->remaining); + returned = le32_to_cpu(sdata->returned); + + count = returned < (channel_count - start_index) ? + returned : (channel_count - start_index); + memcpy_from_le32(&channel_ids[start_index], sdata->channel_array, count); + start_index += count; + } while (remaining && start_index < channel_count); + +err_put_cpu: + put_cpu(); + return sbi_err_map_linux_errno(sret.error); +} + +static int mpxy_read_attrs(u32 channel_id, u32 base_attrid, u32 attr_count, + u32 *attrs_buf) +{ + struct mpxy_local *mpxy = this_cpu_ptr(&mpxy_local); + struct sbiret sret; + + if (!mpxy->shmem_active) + return -ENODEV; + if (!attr_count || !attrs_buf) + return -EINVAL; + + get_cpu(); + + sret = sbi_ecall(SBI_EXT_MPXY, SBI_EXT_MPXY_READ_ATTRS, + channel_id, base_attrid, attr_count, 0, 0, 0); + if (sret.error) + goto err_put_cpu; + + memcpy_from_le32(attrs_buf, (__le32 *)mpxy->shmem, attr_count); + +err_put_cpu: + put_cpu(); + return sbi_err_map_linux_errno(sret.error); +} + +static int mpxy_write_attrs(u32 channel_id, u32 base_attrid, u32 attr_count, + u32 *attrs_buf) +{ + struct mpxy_local *mpxy = this_cpu_ptr(&mpxy_local); + struct sbiret sret; + + if (!mpxy->shmem_active) + return -ENODEV; + if (!attr_count || !attrs_buf) + return -EINVAL; + + get_cpu(); + + memcpy_to_le32((__le32 *)mpxy->shmem, attrs_buf, attr_count); + sret = sbi_ecall(SBI_EXT_MPXY, SBI_EXT_MPXY_WRITE_ATTRS, + channel_id, base_attrid, attr_count, 0, 0, 0); + + put_cpu(); + return sbi_err_map_linux_errno(sret.error); +} + +static int mpxy_send_message_with_resp(u32 channel_id, u32 msg_id, + void *tx, unsigned long tx_len, + void *rx, unsigned long max_rx_len, + unsigned long *rx_len) +{ + struct mpxy_local *mpxy = this_cpu_ptr(&mpxy_local); + unsigned long rx_bytes; + struct sbiret sret; + + if (!mpxy->shmem_active) + return -ENODEV; + if (!tx && tx_len) + return -EINVAL; + + get_cpu(); + + /* Message protocols allowed to have no data in messages */ + if (tx_len) + memcpy(mpxy->shmem, tx, tx_len); + + sret = sbi_ecall(SBI_EXT_MPXY, SBI_EXT_MPXY_SEND_MSG_WITH_RESP, + channel_id, msg_id, tx_len, 0, 0, 0); + if (rx && !sret.error) { + rx_bytes = sret.value; + if (rx_bytes > max_rx_len) { + put_cpu(); + return -ENOSPC; + } + + memcpy(rx, mpxy->shmem, rx_bytes); + if (rx_len) + *rx_len = rx_bytes; + } + + put_cpu(); + return sbi_err_map_linux_errno(sret.error); +} + +static int mpxy_send_message_without_resp(u32 channel_id, u32 msg_id, + void *tx, unsigned long tx_len) +{ + struct mpxy_local *mpxy = this_cpu_ptr(&mpxy_local); + struct sbiret sret; + + if (!mpxy->shmem_active) + return -ENODEV; + if (!tx && tx_len) + return -EINVAL; + + get_cpu(); + + /* Message protocols allowed to have no data in messages */ + if (tx_len) + memcpy(mpxy->shmem, tx, tx_len); + + sret = sbi_ecall(SBI_EXT_MPXY, SBI_EXT_MPXY_SEND_MSG_WITHOUT_RESP, + channel_id, msg_id, tx_len, 0, 0, 0); + + put_cpu(); + return sbi_err_map_linux_errno(sret.error); +} + +static int mpxy_get_notifications(u32 channel_id, + struct sbi_mpxy_notification_data *notif_data, + unsigned long *events_data_len) +{ + struct mpxy_local *mpxy = this_cpu_ptr(&mpxy_local); + struct sbiret sret; + + if (!mpxy->shmem_active) + return -ENODEV; + if (!notif_data || !events_data_len) + return -EINVAL; + + get_cpu(); + + sret = sbi_ecall(SBI_EXT_MPXY, SBI_EXT_MPXY_GET_NOTIFICATION_EVENTS, + channel_id, 0, 0, 0, 0, 0); + if (sret.error) + goto err_put_cpu; + + memcpy(notif_data, mpxy->shmem, sret.value + 16); + *events_data_len = sret.value; + +err_put_cpu: + put_cpu(); + return sbi_err_map_linux_errno(sret.error); +} + +static int mpxy_get_shmem_size(unsigned long *shmem_size) +{ + struct sbiret sret; + + sret = sbi_ecall(SBI_EXT_MPXY, SBI_EXT_MPXY_GET_SHMEM_SIZE, + 0, 0, 0, 0, 0, 0); + if (sret.error) + return sbi_err_map_linux_errno(sret.error); + if (shmem_size) + *shmem_size = sret.value; + return 0; +} + +static int mpxy_setup_shmem(unsigned int cpu) +{ + struct page *shmem_page; + struct mpxy_local *mpxy; + struct sbiret sret; + + mpxy = per_cpu_ptr(&mpxy_local, cpu); + if (mpxy->shmem_active) + return 0; + + shmem_page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(mpxy_shmem_size)); + if (!shmem_page) + return -ENOMEM; + + /* + * Linux setup of shmem is done in mpxy OVERWRITE mode. + * flags[1:0] = 00b + */ + sret = sbi_ecall(SBI_EXT_MPXY, SBI_EXT_MPXY_SET_SHMEM, + page_to_phys(shmem_page), 0, 0, 0, 0, 0); + if (sret.error) { + free_pages((unsigned long)page_to_virt(shmem_page), + get_order(mpxy_shmem_size)); + return sbi_err_map_linux_errno(sret.error); + } + + mpxy->shmem = page_to_virt(shmem_page); + mpxy->shmem_phys_addr = page_to_phys(shmem_page); + mpxy->shmem_active = true; + + return 0; +} + +/* ====== MPXY mailbox data structures ====== */ + +/* MPXY mailbox channel */ +struct mpxy_mbox_channel { + struct mpxy_mbox *mbox; + u32 channel_id; + struct sbi_mpxy_channel_attrs attrs; + struct sbi_mpxy_rpmi_channel_attrs rpmi_attrs; + struct sbi_mpxy_notification_data *notif; + u32 max_xfer_len; + bool have_events_state; + u32 msi_index; + u32 msi_irq; + bool started; +}; + +/* MPXY mailbox */ +struct mpxy_mbox { + struct device *dev; + u32 channel_count; + struct mpxy_mbox_channel *channels; + u32 msi_count; + struct mpxy_mbox_channel **msi_index_to_channel; + struct mbox_controller controller; +}; + +/* ====== MPXY RPMI processing ====== */ + +static void mpxy_mbox_send_rpmi_data(struct mpxy_mbox_channel *mchan, + struct rpmi_mbox_message *msg) +{ + msg->error = 0; + switch (msg->type) { + case RPMI_MBOX_MSG_TYPE_GET_ATTRIBUTE: + switch (msg->attr.id) { + case RPMI_MBOX_ATTR_SPEC_VERSION: + msg->attr.value = mchan->attrs.msg_proto_version; + break; + case RPMI_MBOX_ATTR_MAX_MSG_DATA_SIZE: + msg->attr.value = mchan->max_xfer_len; + break; + case RPMI_MBOX_ATTR_SERVICEGROUP_ID: + msg->attr.value = mchan->rpmi_attrs.servicegroup_id; + break; + case RPMI_MBOX_ATTR_SERVICEGROUP_VERSION: + msg->attr.value = mchan->rpmi_attrs.servicegroup_version; + break; + case RPMI_MBOX_ATTR_IMPL_ID: + msg->attr.value = mchan->rpmi_attrs.impl_id; + break; + case RPMI_MBOX_ATTR_IMPL_VERSION: + msg->attr.value = mchan->rpmi_attrs.impl_version; + break; + default: + msg->error = -EOPNOTSUPP; + break; + } + break; + case RPMI_MBOX_MSG_TYPE_SET_ATTRIBUTE: + /* None of the RPMI linux mailbox attributes are writeable */ + msg->error = -EOPNOTSUPP; + break; + case RPMI_MBOX_MSG_TYPE_SEND_WITH_RESPONSE: + if ((!msg->data.request && msg->data.request_len) || + (msg->data.request && msg->data.request_len > mchan->max_xfer_len) || + (!msg->data.response && msg->data.max_response_len)) { + msg->error = -EINVAL; + break; + } + if (!(mchan->attrs.capability & SBI_MPXY_CHAN_CAP_SEND_WITH_RESP)) { + msg->error = -EIO; + break; + } + msg->error = mpxy_send_message_with_resp(mchan->channel_id, + msg->data.service_id, + msg->data.request, + msg->data.request_len, + msg->data.response, + msg->data.max_response_len, + &msg->data.out_response_len); + break; + case RPMI_MBOX_MSG_TYPE_SEND_WITHOUT_RESPONSE: + if ((!msg->data.request && msg->data.request_len) || + (msg->data.request && msg->data.request_len > mchan->max_xfer_len)) { + msg->error = -EINVAL; + break; + } + if (!(mchan->attrs.capability & SBI_MPXY_CHAN_CAP_SEND_WITHOUT_RESP)) { + msg->error = -EIO; + break; + } + msg->error = mpxy_send_message_without_resp(mchan->channel_id, + msg->data.service_id, + msg->data.request, + msg->data.request_len); + break; + default: + msg->error = -EOPNOTSUPP; + break; + } +} + +static void mpxy_mbox_peek_rpmi_data(struct mbox_chan *chan, + struct mpxy_mbox_channel *mchan, + struct sbi_mpxy_notification_data *notif, + unsigned long events_data_len) +{ + struct rpmi_notification_event *event; + struct rpmi_mbox_message msg; + unsigned long pos = 0; + + while (pos < events_data_len && (events_data_len - pos) <= sizeof(*event)) { + event = (struct rpmi_notification_event *)(notif->events_data + pos); + + msg.type = RPMI_MBOX_MSG_TYPE_NOTIFICATION_EVENT; + msg.notif.event_datalen = le16_to_cpu(event->event_datalen); + msg.notif.event_id = event->event_id; + msg.notif.event_data = event->event_data; + msg.error = 0; + + mbox_chan_received_data(chan, &msg); + pos += sizeof(*event) + msg.notif.event_datalen; + } +} + +static int mpxy_mbox_read_rpmi_attrs(struct mpxy_mbox_channel *mchan) +{ + return mpxy_read_attrs(mchan->channel_id, + SBI_MPXY_ATTR_MSGPROTO_ATTR_START, + sizeof(mchan->rpmi_attrs) / sizeof(u32), + (u32 *)&mchan->rpmi_attrs); +} + +/* ====== MPXY mailbox callbacks ====== */ + +static int mpxy_mbox_send_data(struct mbox_chan *chan, void *data) +{ + struct mpxy_mbox_channel *mchan = chan->con_priv; + + if (mchan->attrs.msg_proto_id == SBI_MPXY_MSGPROTO_RPMI_ID) { + mpxy_mbox_send_rpmi_data(mchan, data); + return 0; + } + + return -EOPNOTSUPP; +} + +static bool mpxy_mbox_peek_data(struct mbox_chan *chan) +{ + struct mpxy_mbox_channel *mchan = chan->con_priv; + struct sbi_mpxy_notification_data *notif = mchan->notif; + bool have_notifications = false; + unsigned long data_len; + int rc; + + if (!(mchan->attrs.capability & SBI_MPXY_CHAN_CAP_GET_NOTIFICATIONS)) + return false; + + do { + rc = mpxy_get_notifications(mchan->channel_id, notif, &data_len); + if (rc || !data_len) + break; + + if (mchan->attrs.msg_proto_id == SBI_MPXY_MSGPROTO_RPMI_ID) + mpxy_mbox_peek_rpmi_data(chan, mchan, notif, data_len); + + have_notifications = true; + } while (1); + + return have_notifications; +} + +static irqreturn_t mpxy_mbox_irq_thread(int irq, void *dev_id) +{ + mpxy_mbox_peek_data(dev_id); + return IRQ_HANDLED; +} + +static int mpxy_mbox_setup_msi(struct mbox_chan *chan, + struct mpxy_mbox_channel *mchan) +{ + struct device *dev = mchan->mbox->dev; + int rc; + + /* Do nothing if MSI not supported */ + if (mchan->msi_irq == U32_MAX) + return 0; + + /* Fail if MSI already enabled */ + if (mchan->attrs.msi_control) + return -EALREADY; + + /* Request channel MSI handler */ + rc = request_threaded_irq(mchan->msi_irq, NULL, mpxy_mbox_irq_thread, + 0, dev_name(dev), chan); + if (rc) { + dev_err(dev, "failed to request MPXY channel 0x%x IRQ\n", + mchan->channel_id); + return rc; + } + + /* Enable channel MSI control */ + mchan->attrs.msi_control = 1; + rc = mpxy_write_attrs(mchan->channel_id, SBI_MPXY_ATTR_MSI_CONTROL, + 1, &mchan->attrs.msi_control); + if (rc) { + dev_err(dev, "enable MSI control failed for MPXY channel 0x%x\n", + mchan->channel_id); + mchan->attrs.msi_control = 0; + free_irq(mchan->msi_irq, chan); + return rc; + } + + return 0; +} + +static void mpxy_mbox_cleanup_msi(struct mbox_chan *chan, + struct mpxy_mbox_channel *mchan) +{ + struct device *dev = mchan->mbox->dev; + int rc; + + /* Do nothing if MSI not supported */ + if (mchan->msi_irq == U32_MAX) + return; + + /* Do nothing if MSI already disabled */ + if (!mchan->attrs.msi_control) + return; + + /* Disable channel MSI control */ + mchan->attrs.msi_control = 0; + rc = mpxy_write_attrs(mchan->channel_id, SBI_MPXY_ATTR_MSI_CONTROL, + 1, &mchan->attrs.msi_control); + if (rc) { + dev_err(dev, "disable MSI control failed for MPXY channel 0x%x\n", + mchan->channel_id); + } + + /* Free channel MSI handler */ + free_irq(mchan->msi_irq, chan); +} + +static int mpxy_mbox_setup_events(struct mpxy_mbox_channel *mchan) +{ + struct device *dev = mchan->mbox->dev; + int rc; + + /* Do nothing if events state not supported */ + if (!mchan->have_events_state) + return 0; + + /* Fail if events state already enabled */ + if (mchan->attrs.events_state_ctrl) + return -EALREADY; + + /* Enable channel events state */ + mchan->attrs.events_state_ctrl = 1; + rc = mpxy_write_attrs(mchan->channel_id, SBI_MPXY_ATTR_EVENTS_STATE_CONTROL, + 1, &mchan->attrs.events_state_ctrl); + if (rc) { + dev_err(dev, "enable events state failed for MPXY channel 0x%x\n", + mchan->channel_id); + mchan->attrs.events_state_ctrl = 0; + return rc; + } + + return 0; +} + +static void mpxy_mbox_cleanup_events(struct mpxy_mbox_channel *mchan) +{ + struct device *dev = mchan->mbox->dev; + int rc; + + /* Do nothing if events state not supported */ + if (!mchan->have_events_state) + return; + + /* Do nothing if events state already disabled */ + if (!mchan->attrs.events_state_ctrl) + return; + + /* Disable channel events state */ + mchan->attrs.events_state_ctrl = 0; + rc = mpxy_write_attrs(mchan->channel_id, SBI_MPXY_ATTR_EVENTS_STATE_CONTROL, + 1, &mchan->attrs.events_state_ctrl); + if (rc) + dev_err(dev, "disable events state failed for MPXY channel 0x%x\n", + mchan->channel_id); +} + +static int mpxy_mbox_startup(struct mbox_chan *chan) +{ + struct mpxy_mbox_channel *mchan = chan->con_priv; + int rc; + + if (mchan->started) + return -EALREADY; + + /* Setup channel MSI */ + rc = mpxy_mbox_setup_msi(chan, mchan); + if (rc) + return rc; + + /* Setup channel notification events */ + rc = mpxy_mbox_setup_events(mchan); + if (rc) { + mpxy_mbox_cleanup_msi(chan, mchan); + return rc; + } + + /* Mark the channel as started */ + mchan->started = true; + + return 0; +} + +static void mpxy_mbox_shutdown(struct mbox_chan *chan) +{ + struct mpxy_mbox_channel *mchan = chan->con_priv; + + if (!mchan->started) + return; + + /* Mark the channel as stopped */ + mchan->started = false; + + /* Cleanup channel notification events */ + mpxy_mbox_cleanup_events(mchan); + + /* Cleanup channel MSI */ + mpxy_mbox_cleanup_msi(chan, mchan); +} + +static const struct mbox_chan_ops mpxy_mbox_ops = { + .send_data = mpxy_mbox_send_data, + .peek_data = mpxy_mbox_peek_data, + .startup = mpxy_mbox_startup, + .shutdown = mpxy_mbox_shutdown, +}; + +/* ====== MPXY platform driver ===== */ + +static void mpxy_mbox_msi_write(struct msi_desc *desc, struct msi_msg *msg) +{ + struct device *dev = msi_desc_to_dev(desc); + struct mpxy_mbox *mbox = dev_get_drvdata(dev); + struct mpxy_mbox_channel *mchan; + struct sbi_mpxy_msi_info *minfo; + int rc; + + mchan = mbox->msi_index_to_channel[desc->msi_index]; + if (!mchan) { + dev_warn(dev, "MPXY channel not available for MSI index %d\n", + desc->msi_index); + return; + } + + minfo = &mchan->attrs.msi_info; + minfo->msi_addr_lo = msg->address_lo; + minfo->msi_addr_hi = msg->address_hi; + minfo->msi_data = msg->data; + + rc = mpxy_write_attrs(mchan->channel_id, SBI_MPXY_ATTR_MSI_ADDR_LO, + sizeof(*minfo) / sizeof(u32), (u32 *)minfo); + if (rc) { + dev_warn(dev, "failed to write MSI info for MPXY channel 0x%x\n", + mchan->channel_id); + } +} + +static struct mbox_chan *mpxy_mbox_fw_xlate(struct mbox_controller *ctlr, + const struct fwnode_reference_args *pa) +{ + struct mpxy_mbox *mbox = container_of(ctlr, struct mpxy_mbox, controller); + struct mpxy_mbox_channel *mchan; + u32 i; + + if (pa->nargs != 2) + return ERR_PTR(-EINVAL); + + for (i = 0; i < mbox->channel_count; i++) { + mchan = &mbox->channels[i]; + if (mchan->channel_id == pa->args[0] && + mchan->attrs.msg_proto_id == pa->args[1]) + return &mbox->controller.chans[i]; + } + + return ERR_PTR(-ENOENT); +} + +static int mpxy_mbox_populate_channels(struct mpxy_mbox *mbox) +{ + u32 i, *channel_ids __free(kfree) = NULL; + struct mpxy_mbox_channel *mchan; + int rc; + + /* Find-out of number of channels */ + rc = mpxy_get_channel_count(&mbox->channel_count); + if (rc) + return dev_err_probe(mbox->dev, rc, "failed to get number of MPXY channels\n"); + if (!mbox->channel_count) + return dev_err_probe(mbox->dev, -ENODEV, "no MPXY channels available\n"); + + /* Allocate and fetch all channel IDs */ + channel_ids = kcalloc(mbox->channel_count, sizeof(*channel_ids), GFP_KERNEL); + if (!channel_ids) + return -ENOMEM; + rc = mpxy_get_channel_ids(mbox->channel_count, channel_ids); + if (rc) + return dev_err_probe(mbox->dev, rc, "failed to get MPXY channel IDs\n"); + + /* Populate all channels */ + mbox->channels = devm_kcalloc(mbox->dev, mbox->channel_count, + sizeof(*mbox->channels), GFP_KERNEL); + if (!mbox->channels) + return -ENOMEM; + for (i = 0; i < mbox->channel_count; i++) { + mchan = &mbox->channels[i]; + mchan->mbox = mbox; + mchan->channel_id = channel_ids[i]; + + rc = mpxy_read_attrs(mchan->channel_id, SBI_MPXY_ATTR_MSG_PROT_ID, + sizeof(mchan->attrs) / sizeof(u32), + (u32 *)&mchan->attrs); + if (rc) { + return dev_err_probe(mbox->dev, rc, + "MPXY channel 0x%x read attrs failed\n", + mchan->channel_id); + } + + if (mchan->attrs.msg_proto_id == SBI_MPXY_MSGPROTO_RPMI_ID) { + rc = mpxy_mbox_read_rpmi_attrs(mchan); + if (rc) { + return dev_err_probe(mbox->dev, rc, + "MPXY channel 0x%x read RPMI attrs failed\n", + mchan->channel_id); + } + } + + mchan->notif = devm_kzalloc(mbox->dev, mpxy_shmem_size, GFP_KERNEL); + if (!mchan->notif) + return -ENOMEM; + + mchan->max_xfer_len = min(mpxy_shmem_size, mchan->attrs.msg_max_len); + + if ((mchan->attrs.capability & SBI_MPXY_CHAN_CAP_GET_NOTIFICATIONS) && + (mchan->attrs.capability & SBI_MPXY_CHAN_CAP_EVENTS_STATE)) + mchan->have_events_state = true; + + if ((mchan->attrs.capability & SBI_MPXY_CHAN_CAP_GET_NOTIFICATIONS) && + (mchan->attrs.capability & SBI_MPXY_CHAN_CAP_MSI)) + mchan->msi_index = mbox->msi_count++; + else + mchan->msi_index = U32_MAX; + mchan->msi_irq = U32_MAX; + } + + return 0; +} + +static int mpxy_mbox_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct mpxy_mbox_channel *mchan; + struct mpxy_mbox *mbox; + int msi_idx, rc; + u32 i; + + /* + * Initialize MPXY shared memory only once. This also ensures + * that SBI MPXY mailbox is probed only once. + */ + if (mpxy_shmem_init_done) { + dev_err(dev, "SBI MPXY mailbox already initialized\n"); + return -EALREADY; + } + + /* Probe for SBI MPXY extension */ + if (sbi_spec_version < sbi_mk_version(1, 0) || + sbi_probe_extension(SBI_EXT_MPXY) <= 0) { + dev_info(dev, "SBI MPXY extension not available\n"); + return -ENODEV; + } + + /* Find-out shared memory size */ + rc = mpxy_get_shmem_size(&mpxy_shmem_size); + if (rc) + return dev_err_probe(dev, rc, "failed to get MPXY shared memory size\n"); + + /* + * Setup MPXY shared memory on each CPU + * + * Note: Don't cleanup MPXY shared memory upon CPU power-down + * because the RPMI System MSI irqchip driver needs it to be + * available when migrating IRQs in CPU power-down path. + */ + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "riscv/sbi-mpxy-shmem", + mpxy_setup_shmem, NULL); + + /* Mark as MPXY shared memory initialization done */ + mpxy_shmem_init_done = true; + + /* Allocate mailbox instance */ + mbox = devm_kzalloc(dev, sizeof(*mbox), GFP_KERNEL); + if (!mbox) + return -ENOMEM; + mbox->dev = dev; + platform_set_drvdata(pdev, mbox); + + /* Populate mailbox channels */ + rc = mpxy_mbox_populate_channels(mbox); + if (rc) + return rc; + + /* Initialize mailbox controller */ + mbox->controller.txdone_irq = false; + mbox->controller.txdone_poll = false; + mbox->controller.ops = &mpxy_mbox_ops; + mbox->controller.dev = dev; + mbox->controller.num_chans = mbox->channel_count; + mbox->controller.fw_xlate = mpxy_mbox_fw_xlate; + mbox->controller.chans = devm_kcalloc(dev, mbox->channel_count, + sizeof(*mbox->controller.chans), + GFP_KERNEL); + if (!mbox->controller.chans) + return -ENOMEM; + for (i = 0; i < mbox->channel_count; i++) + mbox->controller.chans[i].con_priv = &mbox->channels[i]; + + /* Setup MSIs for mailbox (if required) */ + if (mbox->msi_count) { + /* + * The device MSI domain for platform devices on RISC-V architecture + * is only available after the MSI controller driver is probed so, + * explicitly configure here. + */ + if (!dev_get_msi_domain(dev)) { + struct fwnode_handle *fwnode = dev_fwnode(dev); + + /* + * The device MSI domain for OF devices is only set at the + * time of populating/creating OF device. If the device MSI + * domain is discovered later after the OF device is created + * then we need to set it explicitly before using any platform + * MSI functions. + */ + if (is_of_node(fwnode)) { + of_msi_configure(dev, dev_of_node(dev)); + } else if (is_acpi_device_node(fwnode)) { + struct irq_domain *msi_domain; + + msi_domain = irq_find_matching_fwnode(imsic_acpi_get_fwnode(dev), + DOMAIN_BUS_PLATFORM_MSI); + dev_set_msi_domain(dev, msi_domain); + } + + if (!dev_get_msi_domain(dev)) + return -EPROBE_DEFER; + } + + mbox->msi_index_to_channel = devm_kcalloc(dev, mbox->msi_count, + sizeof(*mbox->msi_index_to_channel), + GFP_KERNEL); + if (!mbox->msi_index_to_channel) + return -ENOMEM; + + for (msi_idx = 0; msi_idx < mbox->msi_count; msi_idx++) { + for (i = 0; i < mbox->channel_count; i++) { + mchan = &mbox->channels[i]; + if (mchan->msi_index == msi_idx) { + mbox->msi_index_to_channel[msi_idx] = mchan; + break; + } + } + } + + rc = platform_device_msi_init_and_alloc_irqs(dev, mbox->msi_count, + mpxy_mbox_msi_write); + if (rc) { + return dev_err_probe(dev, rc, "Failed to allocate %d MSIs\n", + mbox->msi_count); + } + + for (i = 0; i < mbox->channel_count; i++) { + mchan = &mbox->channels[i]; + if (mchan->msi_index == U32_MAX) + continue; + mchan->msi_irq = msi_get_virq(dev, mchan->msi_index); + } + } + + /* Register mailbox controller */ + rc = devm_mbox_controller_register(dev, &mbox->controller); + if (rc) { + dev_err_probe(dev, rc, "Registering SBI MPXY mailbox failed\n"); + if (mbox->msi_count) + platform_device_msi_free_irqs_all(dev); + return rc; + } + +#ifdef CONFIG_ACPI + struct acpi_device *adev = ACPI_COMPANION(dev); + + if (adev) + acpi_dev_clear_dependencies(adev); +#endif + + dev_info(dev, "mailbox registered with %d channels\n", + mbox->channel_count); + return 0; +} + +static void mpxy_mbox_remove(struct platform_device *pdev) +{ + struct mpxy_mbox *mbox = platform_get_drvdata(pdev); + + if (mbox->msi_count) + platform_device_msi_free_irqs_all(mbox->dev); +} + +static const struct of_device_id mpxy_mbox_of_match[] = { + { .compatible = "riscv,sbi-mpxy-mbox" }, + {} +}; +MODULE_DEVICE_TABLE(of, mpxy_mbox_of_match); + +static const struct acpi_device_id mpxy_mbox_acpi_match[] = { + { "RSCV0005" }, + {} +}; +MODULE_DEVICE_TABLE(acpi, mpxy_mbox_acpi_match); + +static struct platform_driver mpxy_mbox_driver = { + .driver = { + .name = "riscv-sbi-mpxy-mbox", + .of_match_table = mpxy_mbox_of_match, + .acpi_match_table = mpxy_mbox_acpi_match, + }, + .probe = mpxy_mbox_probe, + .remove = mpxy_mbox_remove, +}; +module_platform_driver(mpxy_mbox_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Anup Patel <apatel@ventanamicro.com>"); +MODULE_DESCRIPTION("RISC-V SBI MPXY mailbox controller driver"); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7da5a37917e9..a757cbcab87f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -962,7 +962,7 @@ static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) if (dma->need_sync && len) { offset = buf - (head + sizeof(*dma)); - virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, + virtqueue_map_sync_single_range_for_cpu(rq->vq, dma->addr, offset, len, DMA_FROM_DEVICE); } @@ -970,8 +970,8 @@ static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) if (dma->ref) return; - virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, - DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + virtqueue_unmap_single_attrs(rq->vq, dma->addr, dma->len, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); put_page(page); } @@ -1038,13 +1038,13 @@ static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) dma->len = alloc_frag->size - sizeof(*dma); - addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, - dma->len, DMA_FROM_DEVICE, 0); - if (virtqueue_dma_mapping_error(rq->vq, addr)) + addr = virtqueue_map_single_attrs(rq->vq, dma + 1, + dma->len, DMA_FROM_DEVICE, 0); + if (virtqueue_map_mapping_error(rq->vq, addr)) return NULL; dma->addr = addr; - dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); + dma->need_sync = virtqueue_map_need_sync(rq->vq, addr); /* Add a reference to dma to prevent the entire dma from * being released during error handling. This reference @@ -5942,9 +5942,9 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, if (!rq->xsk_buffs) return -ENOMEM; - hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, - DMA_TO_DEVICE, 0); - if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) { + hdr_dma = virtqueue_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, + DMA_TO_DEVICE, 0); + if (virtqueue_map_mapping_error(sq->vq, hdr_dma)) { err = -ENOMEM; goto err_free_buffs; } @@ -5973,8 +5973,8 @@ err_sq: err_rq: xsk_pool_dma_unmap(pool, 0); err_xsk_map: - virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, - DMA_TO_DEVICE, 0); + virtqueue_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, + DMA_TO_DEVICE, 0); err_free_buffs: kvfree(rq->xsk_buffs); return err; @@ -6001,8 +6001,8 @@ static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) xsk_pool_dma_unmap(pool, 0); - virtqueue_dma_unmap_single_attrs(sq->vq, sq->xsk_hdr_dma_addr, - vi->hdr_len, DMA_TO_DEVICE, 0); + virtqueue_unmap_single_attrs(sq->vq, sq->xsk_hdr_dma_addr, + vi->hdr_len, DMA_TO_DEVICE, 0); kvfree(rq->xsk_buffs); return err; diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 3fc16bbab025..e255c1b069ec 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -59,10 +59,11 @@ asm volatile(ALTERNATIVE( \ #define PERF_EVENT_FLAG_USER_ACCESS BIT(SYSCTL_USER_ACCESS) #define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY) -PMU_FORMAT_ATTR(event, "config:0-47"); +PMU_FORMAT_ATTR(event, "config:0-55"); PMU_FORMAT_ATTR(firmware, "config:62-63"); static bool sbi_v2_available; +static bool sbi_v3_available; static DEFINE_STATIC_KEY_FALSE(sbi_pmu_snapshot_available); #define sbi_pmu_snapshot_available() \ static_branch_unlikely(&sbi_pmu_snapshot_available) @@ -99,6 +100,7 @@ static unsigned int riscv_pmu_irq; /* Cache the available counters in a bitmask */ static unsigned long cmask; +static int pmu_event_find_cache(u64 config); struct sbi_pmu_event_data { union { union { @@ -298,6 +300,66 @@ static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] }, }; +static int pmu_sbi_check_event_info(void) +{ + int num_events = ARRAY_SIZE(pmu_hw_event_map) + PERF_COUNT_HW_CACHE_MAX * + PERF_COUNT_HW_CACHE_OP_MAX * PERF_COUNT_HW_CACHE_RESULT_MAX; + struct riscv_pmu_event_info *event_info_shmem; + phys_addr_t base_addr; + int i, j, k, result = 0, count = 0; + struct sbiret ret; + + event_info_shmem = kcalloc(num_events, sizeof(*event_info_shmem), GFP_KERNEL); + if (!event_info_shmem) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) + event_info_shmem[count++].event_idx = pmu_hw_event_map[i].event_idx; + + for (i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++) { + for (j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++) { + for (k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++) + event_info_shmem[count++].event_idx = + pmu_cache_event_map[i][j][k].event_idx; + } + } + + base_addr = __pa(event_info_shmem); + if (IS_ENABLED(CONFIG_32BIT)) + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_EVENT_GET_INFO, lower_32_bits(base_addr), + upper_32_bits(base_addr), count, 0, 0, 0); + else + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_EVENT_GET_INFO, base_addr, 0, + count, 0, 0, 0); + if (ret.error) { + result = -EOPNOTSUPP; + goto free_mem; + } + + for (i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) { + if (!(event_info_shmem[i].output & RISCV_PMU_EVENT_INFO_OUTPUT_MASK)) + pmu_hw_event_map[i].event_idx = -ENOENT; + } + + count = ARRAY_SIZE(pmu_hw_event_map); + + for (i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++) { + for (j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++) { + for (k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++) { + if (!(event_info_shmem[count].output & + RISCV_PMU_EVENT_INFO_OUTPUT_MASK)) + pmu_cache_event_map[i][j][k].event_idx = -ENOENT; + count++; + } + } + } + +free_mem: + kfree(event_info_shmem); + + return result; +} + static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata) { struct sbiret ret; @@ -315,6 +377,15 @@ static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata) static void pmu_sbi_check_std_events(struct work_struct *work) { + int ret; + + if (sbi_v3_available) { + ret = pmu_sbi_check_event_info(); + if (ret) + pr_err("pmu_sbi_check_event_info failed with error %d\n", ret); + return; + } + for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) pmu_sbi_check_event(&pmu_hw_event_map[i]); @@ -342,6 +413,71 @@ static bool pmu_sbi_ctr_is_fw(int cidx) return info->type == SBI_PMU_CTR_TYPE_FW; } +int riscv_pmu_get_event_info(u32 type, u64 config, u64 *econfig) +{ + int ret = -ENOENT; + + switch (type) { + case PERF_TYPE_HARDWARE: + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + ret = pmu_hw_event_map[config].event_idx; + break; + case PERF_TYPE_HW_CACHE: + ret = pmu_event_find_cache(config); + break; + case PERF_TYPE_RAW: + /* + * As per SBI v0.3 specification, + * -- the upper 16 bits must be unused for a hardware raw event. + * As per SBI v2.0 specification, + * -- the upper 8 bits must be unused for a hardware raw event. + * Bits 63:62 are used to distinguish between raw events + * 00 - Hardware raw event + * 10 - SBI firmware events + * 11 - Risc-V platform specific firmware event + */ + switch (config >> 62) { + case 0: + if (sbi_v3_available) { + /* Return error any bits [56-63] is set as it is not allowed by the spec */ + if (!(config & ~RISCV_PMU_RAW_EVENT_V2_MASK)) { + if (econfig) + *econfig = config & RISCV_PMU_RAW_EVENT_V2_MASK; + ret = RISCV_PMU_RAW_EVENT_V2_IDX; + } + /* Return error any bits [48-63] is set as it is not allowed by the spec */ + } else if (!(config & ~RISCV_PMU_RAW_EVENT_MASK)) { + if (econfig) + *econfig = config & RISCV_PMU_RAW_EVENT_MASK; + ret = RISCV_PMU_RAW_EVENT_IDX; + } + break; + case 2: + ret = (config & 0xFFFF) | (SBI_PMU_EVENT_TYPE_FW << 16); + break; + case 3: + /* + * For Risc-V platform specific firmware events + * Event code - 0xFFFF + * Event data - raw event encoding + */ + ret = SBI_PMU_EVENT_TYPE_FW << 16 | RISCV_PLAT_FW_EVENT; + if (econfig) + *econfig = config & RISCV_PMU_PLAT_FW_EVENT_MASK; + break; + default: + break; + } + break; + default: + break; + } + + return ret; +} +EXPORT_SYMBOL_GPL(riscv_pmu_get_event_info); + /* * Returns the counter width of a programmable counter and number of hardware * counters. As we don't support heterogeneous CPUs yet, it is okay to just @@ -507,7 +643,6 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) { u32 type = event->attr.type; u64 config = event->attr.config; - int ret = -ENOENT; /* * Ensure we are finished checking standard hardware events for @@ -515,54 +650,7 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) */ flush_work(&check_std_events_work); - switch (type) { - case PERF_TYPE_HARDWARE: - if (config >= PERF_COUNT_HW_MAX) - return -EINVAL; - ret = pmu_hw_event_map[event->attr.config].event_idx; - break; - case PERF_TYPE_HW_CACHE: - ret = pmu_event_find_cache(config); - break; - case PERF_TYPE_RAW: - /* - * As per SBI specification, the upper 16 bits must be unused - * for a hardware raw event. - * Bits 63:62 are used to distinguish between raw events - * 00 - Hardware raw event - * 10 - SBI firmware events - * 11 - Risc-V platform specific firmware event - */ - - switch (config >> 62) { - case 0: - /* Return error any bits [48-63] is set as it is not allowed by the spec */ - if (!(config & ~RISCV_PMU_RAW_EVENT_MASK)) { - *econfig = config & RISCV_PMU_RAW_EVENT_MASK; - ret = RISCV_PMU_RAW_EVENT_IDX; - } - break; - case 2: - ret = (config & 0xFFFF) | (SBI_PMU_EVENT_TYPE_FW << 16); - break; - case 3: - /* - * For Risc-V platform specific firmware events - * Event code - 0xFFFF - * Event data - raw event encoding - */ - ret = SBI_PMU_EVENT_TYPE_FW << 16 | RISCV_PLAT_FW_EVENT; - *econfig = config & RISCV_PMU_PLAT_FW_EVENT_MASK; - break; - default: - break; - } - break; - default: - break; - } - - return ret; + return riscv_pmu_get_event_info(type, config, econfig); } static void pmu_sbi_snapshot_free(struct riscv_pmu *pmu) @@ -1454,6 +1542,9 @@ static int __init pmu_sbi_devinit(void) if (sbi_spec_version >= sbi_mk_version(2, 0)) sbi_v2_available = true; + if (sbi_spec_version >= sbi_mk_version(3, 0)) + sbi_v3_available = true; + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING, "perf/riscv/pmu:starting", pmu_sbi_starting_cpu, pmu_sbi_dying_cpu); diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 559fb9d3271f..857cf288c876 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -34,13 +34,7 @@ config VDPA_SIM_BLOCK config VDPA_USER tristate "VDUSE (vDPA Device in Userspace) support" - depends on EVENTFD && MMU && HAS_DMA - # - # This driver incorrectly tries to override the dma_ops. It should - # never have done that, but for now keep it working on architectures - # that use dma ops - # - depends on ARCH_HAS_DMA_OPS + depends on EVENTFD && MMU select VHOST_IOTLB select IOMMU_IOVA help diff --git a/drivers/vdpa/alibaba/eni_vdpa.c b/drivers/vdpa/alibaba/eni_vdpa.c index ad7f3447fe90..e476504db0c8 100644 --- a/drivers/vdpa/alibaba/eni_vdpa.c +++ b/drivers/vdpa/alibaba/eni_vdpa.c @@ -478,7 +478,8 @@ static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; eni_vdpa = vdpa_alloc_device(struct eni_vdpa, vdpa, - dev, &eni_vdpa_ops, 1, 1, NULL, false); + dev, &eni_vdpa_ops, NULL, + 1, 1, NULL, false); if (IS_ERR(eni_vdpa)) { ENI_ERR(pdev, "failed to allocate vDPA structure\n"); return PTR_ERR(eni_vdpa); @@ -496,7 +497,7 @@ static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_master(pdev); pci_set_drvdata(pdev, eni_vdpa); - eni_vdpa->vdpa.dma_dev = &pdev->dev; + eni_vdpa->vdpa.vmap.dma_dev = &pdev->dev; eni_vdpa->queues = eni_vdpa_get_num_queues(eni_vdpa); eni_vdpa->vring = devm_kcalloc(&pdev->dev, eni_vdpa->queues, diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index ccf64d7bbfaa..6658dc74d915 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -705,7 +705,8 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, vf = &ifcvf_mgmt_dev->vf; pdev = vf->pdev; adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, - &pdev->dev, &ifc_vdpa_ops, 1, 1, NULL, false); + &pdev->dev, &ifc_vdpa_ops, + NULL, 1, 1, NULL, false); if (IS_ERR(adapter)) { IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); return PTR_ERR(adapter); @@ -713,7 +714,7 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, ifcvf_mgmt_dev->adapter = adapter; adapter->pdev = pdev; - adapter->vdpa.dma_dev = &pdev->dev; + adapter->vdpa.vmap.dma_dev = &pdev->dev; adapter->vdpa.mdev = mdev; adapter->vf = vf; vdpa_dev = &adapter->vdpa; diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index c7a20278bc3c..8870a7169267 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -378,7 +378,7 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr u64 pa, offset; u64 paend; struct scatterlist *sg; - struct device *dma = mvdev->vdev.dma_dev; + struct device *dma = mvdev->vdev.vmap.dma_dev; for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { @@ -432,7 +432,7 @@ err_map: static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) { - struct device *dma = mvdev->vdev.dma_dev; + struct device *dma = mvdev->vdev.vmap.dma_dev; destroy_direct_mr(mvdev, mr); dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 0ed2fc28e1ce..82034efb74fc 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -3395,14 +3395,17 @@ static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid) return err; } -static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx) +static union virtio_map mlx5_get_vq_map(struct vdpa_device *vdev, u16 idx) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + union virtio_map map; if (is_ctrl_vq_idx(mvdev, idx)) - return &vdev->dev; + map.dma_dev = &vdev->dev; + else + map.dma_dev = mvdev->vdev.vmap.dma_dev; - return mvdev->vdev.dma_dev; + return map; } static void free_irqs(struct mlx5_vdpa_net *ndev) @@ -3686,7 +3689,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { .set_map = mlx5_vdpa_set_map, .reset_map = mlx5_vdpa_reset_map, .set_group_asid = mlx5_set_group_asid, - .get_vq_dma_dev = mlx5_get_vq_dma_dev, + .get_vq_map = mlx5_get_vq_map, .free = mlx5_vdpa_free, .suspend = mlx5_vdpa_suspend, .resume = mlx5_vdpa_resume, /* Op disabled if not supported. */ @@ -3879,7 +3882,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, } ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops, - MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false); + NULL, MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false); if (IS_ERR(ndev)) return PTR_ERR(ndev); @@ -3965,7 +3968,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, } ndev->mvdev.mlx_features = device_features; - mvdev->vdev.dma_dev = &mdev->pdev->dev; + mvdev->vdev.vmap.dma_dev = &mdev->pdev->dev; err = mlx5_vdpa_alloc_resources(&ndev->mvdev); if (err) goto err_alloc; diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_main.c b/drivers/vdpa/octeon_ep/octep_vdpa_main.c index 9b49efd24391..9e8d07078606 100644 --- a/drivers/vdpa/octeon_ep/octep_vdpa_main.c +++ b/drivers/vdpa/octeon_ep/octep_vdpa_main.c @@ -508,15 +508,15 @@ static int octep_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, u64 device_features; int ret; - oct_vdpa = vdpa_alloc_device(struct octep_vdpa, vdpa, &pdev->dev, &octep_vdpa_ops, 1, 1, - NULL, false); + oct_vdpa = vdpa_alloc_device(struct octep_vdpa, vdpa, &pdev->dev, &octep_vdpa_ops, + NULL, 1, 1, NULL, false); if (IS_ERR(oct_vdpa)) { dev_err(&pdev->dev, "Failed to allocate vDPA structure for octep vdpa device"); return PTR_ERR(oct_vdpa); } oct_vdpa->pdev = pdev; - oct_vdpa->vdpa.dma_dev = &pdev->dev; + oct_vdpa->vdpa.vmap.dma_dev = &pdev->dev; oct_vdpa->vdpa.mdev = mdev; oct_vdpa->oct_hw = oct_hw; vdpa_dev = &oct_vdpa->vdpa; diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c index 301d95e08596..36f61cc96e21 100644 --- a/drivers/vdpa/pds/vdpa_dev.c +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -632,7 +632,8 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, } pdsv = vdpa_alloc_device(struct pds_vdpa_device, vdpa_dev, - dev, &pds_vdpa_ops, 1, 1, name, false); + dev, &pds_vdpa_ops, NULL, + 1, 1, name, false); if (IS_ERR(pdsv)) { dev_err(dev, "Failed to allocate vDPA structure: %pe\n", pdsv); return PTR_ERR(pdsv); @@ -643,7 +644,7 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, pdev = vdpa_aux->padev->vf_pdev; dma_dev = &pdev->dev; - pdsv->vdpa_dev.dma_dev = dma_dev; + pdsv->vdpa_dev.vmap.dma_dev = dma_dev; status = pds_vdpa_get_status(&pdsv->vdpa_dev); if (status == 0xff) { diff --git a/drivers/vdpa/solidrun/snet_main.c b/drivers/vdpa/solidrun/snet_main.c index 55ec51c17ab3..4588211d57eb 100644 --- a/drivers/vdpa/solidrun/snet_main.c +++ b/drivers/vdpa/solidrun/snet_main.c @@ -1008,8 +1008,8 @@ static int snet_vdpa_probe_vf(struct pci_dev *pdev) } /* Allocate vdpa device */ - snet = vdpa_alloc_device(struct snet, vdpa, &pdev->dev, &snet_config_ops, 1, 1, NULL, - false); + snet = vdpa_alloc_device(struct snet, vdpa, &pdev->dev, &snet_config_ops, + NULL, 1, 1, NULL, false); if (!snet) { SNET_ERR(pdev, "Failed to allocate a vdpa device\n"); ret = -ENOMEM; @@ -1052,8 +1052,8 @@ static int snet_vdpa_probe_vf(struct pci_dev *pdev) */ snet_reserve_irq_idx(pf_irqs ? pdev_pf : pdev, snet); - /*set DMA device*/ - snet->vdpa.dma_dev = &pdev->dev; + /* set map metadata */ + snet->vdpa.vmap.dma_dev = &pdev->dev; /* Register VDPA device */ ret = vdpa_register_device(&snet->vdpa, snet->cfg->vq_num); diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 8a372b51c21a..34874beb0152 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -142,6 +142,7 @@ static void vdpa_release_dev(struct device *d) * initialized but before registered. * @parent: the parent device * @config: the bus operations that is supported by this device + * @map: the map operations that is supported by this device * @ngroups: number of groups supported by this device * @nas: number of address spaces supported by this device * @size: size of the parent structure that contains private data @@ -151,11 +152,12 @@ static void vdpa_release_dev(struct device *d) * Driver should use vdpa_alloc_device() wrapper macro instead of * using this directly. * - * Return: Returns an error when parent/config/dma_dev is not set or fail to get + * Return: Returns an error when parent/config/map is not set or fail to get * ida. */ struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, + const struct virtio_map_ops *map, unsigned int ngroups, unsigned int nas, size_t size, const char *name, bool use_va) @@ -187,6 +189,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, vdev->dev.release = vdpa_release_dev; vdev->index = err; vdev->config = config; + vdev->map = map; vdev->features_valid = false; vdev->use_va = use_va; vdev->ngroups = ngroups; diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index c204fc8e471a..c1c6431950e1 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -215,7 +215,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr, else ops = &vdpasim_config_ops; - vdpa = __vdpa_alloc_device(NULL, ops, + vdpa = __vdpa_alloc_device(NULL, ops, NULL, dev_attr->ngroups, dev_attr->nas, dev_attr->alloc_size, dev_attr->name, use_va); @@ -272,7 +272,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr, vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0], &vdpasim->iommu_lock); - vdpasim->vdpa.dma_dev = dev; + vdpasim->vdpa.vmap.dma_dev = dev; return vdpasim; diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c index 58116f89d8da..4352b5cf74f0 100644 --- a/drivers/vdpa/vdpa_user/iova_domain.c +++ b/drivers/vdpa/vdpa_user/iova_domain.c @@ -103,19 +103,38 @@ void vduse_domain_clear_map(struct vduse_iova_domain *domain, static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain, u64 iova, u64 size, u64 paddr) { - struct vduse_bounce_map *map; + struct vduse_bounce_map *map, *head_map; + struct page *tmp_page; u64 last = iova + size - 1; while (iova <= last) { - map = &domain->bounce_maps[iova >> PAGE_SHIFT]; + /* + * When PAGE_SIZE is larger than 4KB, multiple adjacent bounce_maps will + * point to the same memory page of PAGE_SIZE. Since bounce_maps originate + * from IO requests, we may not be able to guarantee that the orig_phys + * values of all IO requests within the same 64KB memory page are contiguous. + * Therefore, we need to store them separately. + * + * Bounce pages are allocated on demand. As a result, it may occur that + * multiple bounce pages corresponding to the same 64KB memory page attempt + * to allocate memory simultaneously, so we use cmpxchg to handle this + * concurrency. + */ + map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT]; if (!map->bounce_page) { - map->bounce_page = alloc_page(GFP_ATOMIC); - if (!map->bounce_page) - return -ENOMEM; + head_map = &domain->bounce_maps[(iova & PAGE_MASK) >> BOUNCE_MAP_SHIFT]; + if (!head_map->bounce_page) { + tmp_page = alloc_page(GFP_ATOMIC); + if (!tmp_page) + return -ENOMEM; + if (cmpxchg(&head_map->bounce_page, NULL, tmp_page)) + __free_page(tmp_page); + } + map->bounce_page = head_map->bounce_page; } map->orig_phys = paddr; - paddr += PAGE_SIZE; - iova += PAGE_SIZE; + paddr += BOUNCE_MAP_SIZE; + iova += BOUNCE_MAP_SIZE; } return 0; } @@ -127,12 +146,17 @@ static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain, u64 last = iova + size - 1; while (iova <= last) { - map = &domain->bounce_maps[iova >> PAGE_SHIFT]; + map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT]; map->orig_phys = INVALID_PHYS_ADDR; - iova += PAGE_SIZE; + iova += BOUNCE_MAP_SIZE; } } +static unsigned int offset_in_bounce_page(dma_addr_t addr) +{ + return (addr & ~BOUNCE_MAP_MASK); +} + static void do_bounce(phys_addr_t orig, void *addr, size_t size, enum dma_data_direction dir) { @@ -163,7 +187,7 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain, { struct vduse_bounce_map *map; struct page *page; - unsigned int offset; + unsigned int offset, head_offset; void *addr; size_t sz; @@ -171,9 +195,10 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain, return; while (size) { - map = &domain->bounce_maps[iova >> PAGE_SHIFT]; - offset = offset_in_page(iova); - sz = min_t(size_t, PAGE_SIZE - offset, size); + map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT]; + head_offset = offset_in_page(iova); + offset = offset_in_bounce_page(iova); + sz = min_t(size_t, BOUNCE_MAP_SIZE - offset, size); if (WARN_ON(!map->bounce_page || map->orig_phys == INVALID_PHYS_ADDR)) @@ -183,7 +208,7 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain, map->user_bounce_page : map->bounce_page; addr = kmap_local_page(page); - do_bounce(map->orig_phys + offset, addr + offset, sz, dir); + do_bounce(map->orig_phys + offset, addr + head_offset, sz, dir); kunmap_local(addr); size -= sz; iova += sz; @@ -218,7 +243,7 @@ vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova) struct page *page = NULL; read_lock(&domain->bounce_lock); - map = &domain->bounce_maps[iova >> PAGE_SHIFT]; + map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT]; if (domain->user_bounce_pages || !map->bounce_page) goto out; @@ -236,7 +261,7 @@ vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain) struct vduse_bounce_map *map; unsigned long pfn, bounce_pfns; - bounce_pfns = domain->bounce_size >> PAGE_SHIFT; + bounce_pfns = domain->bounce_size >> BOUNCE_MAP_SHIFT; for (pfn = 0; pfn < bounce_pfns; pfn++) { map = &domain->bounce_maps[pfn]; @@ -246,7 +271,8 @@ vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain) if (!map->bounce_page) continue; - __free_page(map->bounce_page); + if (!((pfn << BOUNCE_MAP_SHIFT) & ~PAGE_MASK)) + __free_page(map->bounce_page); map->bounce_page = NULL; } } @@ -254,8 +280,12 @@ vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain) int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain, struct page **pages, int count) { - struct vduse_bounce_map *map; - int i, ret; + struct vduse_bounce_map *map, *head_map; + int i, j, ret; + int inner_pages = PAGE_SIZE / BOUNCE_MAP_SIZE; + int bounce_pfns = domain->bounce_size >> BOUNCE_MAP_SHIFT; + struct page *head_page = NULL; + bool need_copy; /* Now we don't support partial mapping */ if (count != (domain->bounce_size >> PAGE_SHIFT)) @@ -267,16 +297,23 @@ int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain, goto out; for (i = 0; i < count; i++) { - map = &domain->bounce_maps[i]; - if (map->bounce_page) { + need_copy = false; + head_map = &domain->bounce_maps[(i * inner_pages)]; + head_page = head_map->bounce_page; + for (j = 0; j < inner_pages; j++) { + if ((i * inner_pages + j) >= bounce_pfns) + break; + map = &domain->bounce_maps[(i * inner_pages + j)]; /* Copy kernel page to user page if it's in use */ - if (map->orig_phys != INVALID_PHYS_ADDR) - memcpy_to_page(pages[i], 0, - page_address(map->bounce_page), - PAGE_SIZE); + if ((head_page) && (map->orig_phys != INVALID_PHYS_ADDR)) + need_copy = true; + map->user_bounce_page = pages[i]; } - map->user_bounce_page = pages[i]; get_page(pages[i]); + if ((head_page) && (need_copy)) + memcpy_to_page(pages[i], 0, + page_address(head_page), + PAGE_SIZE); } domain->user_bounce_pages = true; ret = 0; @@ -288,8 +325,12 @@ out: void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain) { - struct vduse_bounce_map *map; - unsigned long i, count; + struct vduse_bounce_map *map, *head_map; + unsigned long i, j, count; + int inner_pages = PAGE_SIZE / BOUNCE_MAP_SIZE; + int bounce_pfns = domain->bounce_size >> BOUNCE_MAP_SHIFT; + struct page *head_page = NULL; + bool need_copy; write_lock(&domain->bounce_lock); if (!domain->user_bounce_pages) @@ -297,20 +338,27 @@ void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain) count = domain->bounce_size >> PAGE_SHIFT; for (i = 0; i < count; i++) { - struct page *page = NULL; - - map = &domain->bounce_maps[i]; - if (WARN_ON(!map->user_bounce_page)) + need_copy = false; + head_map = &domain->bounce_maps[(i * inner_pages)]; + if (WARN_ON(!head_map->user_bounce_page)) continue; - - /* Copy user page to kernel page if it's in use */ - if (map->orig_phys != INVALID_PHYS_ADDR) { - page = map->bounce_page; - memcpy_from_page(page_address(page), - map->user_bounce_page, 0, PAGE_SIZE); + head_page = head_map->user_bounce_page; + + for (j = 0; j < inner_pages; j++) { + if ((i * inner_pages + j) >= bounce_pfns) + break; + map = &domain->bounce_maps[(i * inner_pages + j)]; + if (WARN_ON(!map->user_bounce_page)) + continue; + /* Copy user page to kernel page if it's in use */ + if ((map->orig_phys != INVALID_PHYS_ADDR) && (head_map->bounce_page)) + need_copy = true; + map->user_bounce_page = NULL; } - put_page(map->user_bounce_page); - map->user_bounce_page = NULL; + if (need_copy) + memcpy_from_page(page_address(head_map->bounce_page), + head_page, 0, PAGE_SIZE); + put_page(head_page); } domain->user_bounce_pages = false; out: @@ -447,7 +495,7 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain, void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain, size_t size, dma_addr_t *dma_addr, - gfp_t flag, unsigned long attrs) + gfp_t flag) { struct iova_domain *iovad = &domain->consistent_iovad; unsigned long limit = domain->iova_limit; @@ -581,7 +629,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size) unsigned long pfn, bounce_pfns; int ret; - bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT; + bounce_pfns = PAGE_ALIGN(bounce_size) >> BOUNCE_MAP_SHIFT; if (iova_limit <= bounce_size) return NULL; @@ -613,7 +661,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size) rwlock_init(&domain->bounce_lock); spin_lock_init(&domain->iotlb_lock); init_iova_domain(&domain->stream_iovad, - PAGE_SIZE, IOVA_START_PFN); + BOUNCE_MAP_SIZE, IOVA_START_PFN); ret = iova_domain_init_rcaches(&domain->stream_iovad); if (ret) goto err_iovad_stream; diff --git a/drivers/vdpa/vdpa_user/iova_domain.h b/drivers/vdpa/vdpa_user/iova_domain.h index 7f3f0928ec78..775cad5238f3 100644 --- a/drivers/vdpa/vdpa_user/iova_domain.h +++ b/drivers/vdpa/vdpa_user/iova_domain.h @@ -19,6 +19,11 @@ #define INVALID_PHYS_ADDR (~(phys_addr_t)0) +#define BOUNCE_MAP_SHIFT 12 +#define BOUNCE_MAP_SIZE (1 << BOUNCE_MAP_SHIFT) +#define BOUNCE_MAP_MASK (~(BOUNCE_MAP_SIZE - 1)) +#define BOUNCE_MAP_ALIGN(addr) (((addr) + BOUNCE_MAP_SIZE - 1) & ~(BOUNCE_MAP_SIZE - 1)) + struct vduse_bounce_map { struct page *bounce_page; struct page *user_bounce_page; @@ -64,7 +69,7 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain, void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain, size_t size, dma_addr_t *dma_addr, - gfp_t flag, unsigned long attrs); + gfp_t flag); void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size, void *vaddr, dma_addr_t dma_addr, diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 04620bb77203..e7bced0b5542 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -814,59 +814,53 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = { .free = vduse_vdpa_free, }; -static void vduse_dev_sync_single_for_device(struct device *dev, +static void vduse_dev_sync_single_for_device(union virtio_map token, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) { - struct vduse_dev *vdev = dev_to_vduse(dev); - struct vduse_iova_domain *domain = vdev->domain; + struct vduse_iova_domain *domain = token.iova_domain; vduse_domain_sync_single_for_device(domain, dma_addr, size, dir); } -static void vduse_dev_sync_single_for_cpu(struct device *dev, +static void vduse_dev_sync_single_for_cpu(union virtio_map token, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) { - struct vduse_dev *vdev = dev_to_vduse(dev); - struct vduse_iova_domain *domain = vdev->domain; + struct vduse_iova_domain *domain = token.iova_domain; vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir); } -static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page, +static dma_addr_t vduse_dev_map_page(union virtio_map token, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { - struct vduse_dev *vdev = dev_to_vduse(dev); - struct vduse_iova_domain *domain = vdev->domain; + struct vduse_iova_domain *domain = token.iova_domain; return vduse_domain_map_page(domain, page, offset, size, dir, attrs); } -static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) +static void vduse_dev_unmap_page(union virtio_map token, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) { - struct vduse_dev *vdev = dev_to_vduse(dev); - struct vduse_iova_domain *domain = vdev->domain; + struct vduse_iova_domain *domain = token.iova_domain; return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs); } -static void *vduse_dev_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs) +static void *vduse_dev_alloc_coherent(union virtio_map token, size_t size, + dma_addr_t *dma_addr, gfp_t flag) { - struct vduse_dev *vdev = dev_to_vduse(dev); - struct vduse_iova_domain *domain = vdev->domain; + struct vduse_iova_domain *domain = token.iova_domain; unsigned long iova; void *addr; *dma_addr = DMA_MAPPING_ERROR; addr = vduse_domain_alloc_coherent(domain, size, - (dma_addr_t *)&iova, flag, attrs); + (dma_addr_t *)&iova, flag); if (!addr) return NULL; @@ -875,31 +869,45 @@ static void *vduse_dev_alloc_coherent(struct device *dev, size_t size, return addr; } -static void vduse_dev_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - unsigned long attrs) +static void vduse_dev_free_coherent(union virtio_map token, size_t size, + void *vaddr, dma_addr_t dma_addr, + unsigned long attrs) { - struct vduse_dev *vdev = dev_to_vduse(dev); - struct vduse_iova_domain *domain = vdev->domain; + struct vduse_iova_domain *domain = token.iova_domain; vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs); } -static size_t vduse_dev_max_mapping_size(struct device *dev) +static bool vduse_dev_need_sync(union virtio_map token, dma_addr_t dma_addr) { - struct vduse_dev *vdev = dev_to_vduse(dev); - struct vduse_iova_domain *domain = vdev->domain; + struct vduse_iova_domain *domain = token.iova_domain; + + return dma_addr < domain->bounce_size; +} + +static int vduse_dev_mapping_error(union virtio_map token, dma_addr_t dma_addr) +{ + if (unlikely(dma_addr == DMA_MAPPING_ERROR)) + return -ENOMEM; + return 0; +} + +static size_t vduse_dev_max_mapping_size(union virtio_map token) +{ + struct vduse_iova_domain *domain = token.iova_domain; return domain->bounce_size; } -static const struct dma_map_ops vduse_dev_dma_ops = { +static const struct virtio_map_ops vduse_map_ops = { .sync_single_for_device = vduse_dev_sync_single_for_device, .sync_single_for_cpu = vduse_dev_sync_single_for_cpu, .map_page = vduse_dev_map_page, .unmap_page = vduse_dev_unmap_page, .alloc = vduse_dev_alloc_coherent, .free = vduse_dev_free_coherent, + .need_sync = vduse_dev_need_sync, + .mapping_error = vduse_dev_mapping_error, .max_mapping_size = vduse_dev_max_mapping_size, }; @@ -2003,26 +2011,18 @@ static struct vduse_mgmt_dev *vduse_mgmt; static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) { struct vduse_vdpa *vdev; - int ret; if (dev->vdev) return -EEXIST; vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, - &vduse_vdpa_config_ops, 1, 1, name, true); + &vduse_vdpa_config_ops, &vduse_map_ops, + 1, 1, name, true); if (IS_ERR(vdev)) return PTR_ERR(vdev); dev->vdev = vdev; vdev->dev = dev; - vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask; - ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64)); - if (ret) { - put_device(&vdev->vdpa.dev); - return ret; - } - set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); - vdev->vdpa.dma_dev = &vdev->vdpa.dev; vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev; return 0; @@ -2055,6 +2055,7 @@ static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, return -ENOMEM; } + dev->vdev->vdpa.vmap.iova_domain = dev->domain; ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num); if (ret) { put_device(&dev->vdev->vdpa.dev); diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index 8787407f75b0..17a19a728c9c 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -511,7 +511,8 @@ static int vp_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, int ret, i; vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa, - dev, &vp_vdpa_ops, 1, 1, name, false); + dev, &vp_vdpa_ops, NULL, + 1, 1, name, false); if (IS_ERR(vp_vdpa)) { dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n"); @@ -520,7 +521,7 @@ static int vp_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, vp_vdpa_mgtdev->vp_vdpa = vp_vdpa; - vp_vdpa->vdpa.dma_dev = &pdev->dev; + vp_vdpa->vdpa.vmap.dma_dev = &pdev->dev; vp_vdpa->queues = vp_modern_get_num_queues(mdev); vp_vdpa->mdev = mdev; diff --git a/drivers/vfio/cdx/Makefile b/drivers/vfio/cdx/Makefile index df92b320122a..dadbef2419ea 100644 --- a/drivers/vfio/cdx/Makefile +++ b/drivers/vfio/cdx/Makefile @@ -5,4 +5,8 @@ obj-$(CONFIG_VFIO_CDX) += vfio-cdx.o -vfio-cdx-objs := main.o intr.o +vfio-cdx-objs := main.o + +ifdef CONFIG_GENERIC_MSI_IRQ +vfio-cdx-objs += intr.o +endif diff --git a/drivers/vfio/cdx/private.h b/drivers/vfio/cdx/private.h index dc56729b3114..172e48caa3a0 100644 --- a/drivers/vfio/cdx/private.h +++ b/drivers/vfio/cdx/private.h @@ -38,11 +38,25 @@ struct vfio_cdx_device { u8 config_msi; }; +#ifdef CONFIG_GENERIC_MSI_IRQ int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev, u32 flags, unsigned int index, unsigned int start, unsigned int count, void *data); void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev); +#else +static int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev, + u32 flags, unsigned int index, + unsigned int start, unsigned int count, + void *data) +{ + return -EINVAL; +} + +static void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev) +{ +} +#endif #endif /* VFIO_CDX_PRIVATE_H */ diff --git a/drivers/vfio/fsl-mc/Kconfig b/drivers/vfio/fsl-mc/Kconfig index 7d1d690348f0..43c145d17971 100644 --- a/drivers/vfio/fsl-mc/Kconfig +++ b/drivers/vfio/fsl-mc/Kconfig @@ -2,9 +2,12 @@ menu "VFIO support for FSL_MC bus devices" depends on FSL_MC_BUS config VFIO_FSL_MC - tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices" + tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices (DEPRECATED)" select EVENTFD help + The vfio-fsl-mc driver is deprecated and will be removed in a + future kernel release. + Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc (Management Complex) devices. This is required to passthrough fsl-mc bus devices using the VFIO framework. diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index f65d91c01f2e..76ccbab0e3d6 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -537,6 +537,8 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev) struct device *dev = &mc_dev->dev; int ret; + dev_err_once(dev, "DEPRECATION: vfio-fsl-mc is deprecated and will be removed in a future kernel release\n"); + vdev = vfio_alloc_device(vfio_fsl_mc_device, vdev, dev, &vfio_fsl_mc_ops); if (IS_ERR(vdev)) diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 397f5e445136..fde33f54e99e 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -1612,8 +1612,10 @@ static void hisi_acc_vfio_debug_init(struct hisi_acc_vf_core_device *hisi_acc_vd } migf = kzalloc(sizeof(*migf), GFP_KERNEL); - if (!migf) + if (!migf) { + dput(vfio_dev_migration); return; + } hisi_acc_vdev->debug_migf = migf; vfio_hisi_acc = debugfs_create_dir("hisi_acc", vfio_dev_migration); @@ -1623,6 +1625,8 @@ static void hisi_acc_vfio_debug_init(struct hisi_acc_vf_core_device *hisi_acc_vd hisi_acc_vf_migf_read); debugfs_create_devm_seqfile(dev, "cmd_state", vfio_hisi_acc, hisi_acc_vf_debug_cmd); + + dput(vfio_dev_migration); } static void hisi_acc_vf_debugfs_exit(struct hisi_acc_vf_core_device *hisi_acc_vdev) diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c index d95761dcdd58..e346392b72f6 100644 --- a/drivers/vfio/pci/nvgrace-gpu/main.c +++ b/drivers/vfio/pci/nvgrace-gpu/main.c @@ -260,7 +260,7 @@ nvgrace_gpu_ioctl_get_region_info(struct vfio_device *core_vdev, info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); /* * The region memory size may not be power-of-2 aligned. - * Given that the memory as a BAR and may not be + * Given that the memory is a BAR and may not be * aligned, roundup to the next power-of-2. */ info.size = memregion->bar_size; @@ -995,6 +995,8 @@ static const struct pci_device_id nvgrace_gpu_vfio_pci_table[] = { { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2348) }, /* GB200 SKU */ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2941) }, + /* GB300 SKU */ + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x31C2) }, {} }; diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index c51f5e4c3dd6..481992142f79 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -82,7 +82,7 @@ static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_region *region, host_ack_bmp = vzalloc(bytes); if (!host_ack_bmp) { - bitmap_free(host_seq_bmp); + vfree(host_seq_bmp); return -ENOMEM; } diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 123298a4dc8f..30d3e921cb0d 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -304,9 +304,14 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev, vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; + if (!vdev->pci_2_3) + irq_set_status_flags(pdev->irq, IRQ_DISABLE_UNLAZY); + ret = request_irq(pdev->irq, vfio_intx_handler, irqflags, ctx->name, ctx); if (ret) { + if (!vdev->pci_2_3) + irq_clear_status_flags(pdev->irq, IRQ_DISABLE_UNLAZY); vdev->irq_type = VFIO_PCI_NUM_IRQS; kfree(name); vfio_irq_ctx_free(vdev, ctx, 0); @@ -352,6 +357,8 @@ static void vfio_intx_disable(struct vfio_pci_core_device *vdev) vfio_virqfd_disable(&ctx->unmask); vfio_virqfd_disable(&ctx->mask); free_irq(pdev->irq, ctx); + if (!vdev->pci_2_3) + irq_clear_status_flags(pdev->irq, IRQ_DISABLE_UNLAZY); if (ctx->trigger) eventfd_ctx_put(ctx->trigger); kfree(ctx->name); @@ -677,7 +684,7 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev, { struct vfio_pci_irq_ctx *ctx; unsigned int i; - bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false; + bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX); if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) { vfio_msi_disable(vdev, msix); diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig index 88fcde51f024..c6be29b2c24b 100644 --- a/drivers/vfio/platform/Kconfig +++ b/drivers/vfio/platform/Kconfig @@ -17,10 +17,13 @@ config VFIO_PLATFORM If you don't know what to do here, say N. config VFIO_AMBA - tristate "VFIO support for AMBA devices" + tristate "VFIO support for AMBA devices (DEPRECATED)" depends on ARM_AMBA || COMPILE_TEST select VFIO_PLATFORM_BASE help + The vfio-amba driver is deprecated and will be removed in a + future kernel release. + Support for ARM AMBA devices with VFIO. This is required to make use of ARM AMBA devices present on the system using the VFIO framework. diff --git a/drivers/vfio/platform/reset/Kconfig b/drivers/vfio/platform/reset/Kconfig index dcc08dc145a5..70af0dbe293b 100644 --- a/drivers/vfio/platform/reset/Kconfig +++ b/drivers/vfio/platform/reset/Kconfig @@ -1,21 +1,21 @@ # SPDX-License-Identifier: GPL-2.0-only if VFIO_PLATFORM config VFIO_PLATFORM_CALXEDAXGMAC_RESET - tristate "VFIO support for calxeda xgmac reset" + tristate "VFIO support for calxeda xgmac reset (DEPRECATED)" help Enables the VFIO platform driver to handle reset for Calxeda xgmac If you don't know what to do here, say N. config VFIO_PLATFORM_AMDXGBE_RESET - tristate "VFIO support for AMD XGBE reset" + tristate "VFIO support for AMD XGBE reset (DEPRECATED)" help Enables the VFIO platform driver to handle reset for AMD XGBE If you don't know what to do here, say N. config VFIO_PLATFORM_BCMFLEXRM_RESET - tristate "VFIO support for Broadcom FlexRM reset" + tristate "VFIO support for Broadcom FlexRM reset (DEPRECATED)" depends on ARCH_BCM_IPROC || COMPILE_TEST default ARCH_BCM_IPROC help diff --git a/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c b/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c index abdca900802d..45f386a042a9 100644 --- a/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c +++ b/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c @@ -52,6 +52,8 @@ static int vfio_platform_amdxgbe_reset(struct vfio_platform_device *vdev) u32 dma_mr_value, pcs_value, value; unsigned int count; + dev_err_once(vdev->device, "DEPRECATION: VFIO AMD XGBE platform reset is deprecated and will be removed in a future kernel release\n"); + if (!xgmac_regs->ioaddr) { xgmac_regs->ioaddr = ioremap(xgmac_regs->addr, xgmac_regs->size); diff --git a/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c b/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c index 1131ebe4837d..51c9d156f307 100644 --- a/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c +++ b/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c @@ -72,6 +72,8 @@ static int vfio_platform_bcmflexrm_reset(struct vfio_platform_device *vdev) int rc = 0, ret = 0, ring_num = 0; struct vfio_platform_region *reg = &vdev->regions[0]; + dev_err_once(vdev->device, "DEPRECATION: VFIO Broadcom FlexRM platform reset is deprecated and will be removed in a future kernel release\n"); + /* Map FlexRM ring registers if not mapped */ if (!reg->ioaddr) { reg->ioaddr = ioremap(reg->addr, reg->size); diff --git a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c index 63cc7f0b2e4a..a298045a8e19 100644 --- a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c +++ b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c @@ -50,6 +50,8 @@ static int vfio_platform_calxedaxgmac_reset(struct vfio_platform_device *vdev) { struct vfio_platform_region *reg = &vdev->regions[0]; + dev_err_once(vdev->device, "DEPRECATION: VFIO Calxeda xgmac platform reset is deprecated and will be removed in a future kernel release\n"); + if (!reg->ioaddr) { reg->ioaddr = ioremap(reg->addr, reg->size); diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c index ff8ff8480968..9f5c527baa8a 100644 --- a/drivers/vfio/platform/vfio_amba.c +++ b/drivers/vfio/platform/vfio_amba.c @@ -70,6 +70,8 @@ static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id) struct vfio_platform_device *vdev; int ret; + dev_err_once(&adev->dev, "DEPRECATION: vfio-amba is deprecated and will be removed in a future kernel release\n"); + vdev = vfio_alloc_device(vfio_platform_device, vdev, &adev->dev, &vfio_amba_ops); if (IS_ERR(vdev)) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 5046cae05222..38c8e9350a60 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -28,6 +28,7 @@ #include <linux/pseudo_fs.h> #include <linux/rwsem.h> #include <linux/sched.h> +#include <linux/seq_file.h> #include <linux/slab.h> #include <linux/stat.h> #include <linux/string.h> @@ -1251,7 +1252,7 @@ static int vfio_ioctl_device_feature(struct vfio_device *device, feature.argsz - minsz); default: if (unlikely(!device->ops->device_feature)) - return -EINVAL; + return -ENOTTY; return device->ops->device_feature(device, feature.flags, arg->data, feature.argsz - minsz); @@ -1355,6 +1356,22 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) return device->ops->mmap(device, vma); } +#ifdef CONFIG_PROC_FS +static void vfio_device_show_fdinfo(struct seq_file *m, struct file *filep) +{ + char *path; + struct vfio_device_file *df = filep->private_data; + struct vfio_device *device = df->device; + + path = kobject_get_path(&device->dev->kobj, GFP_KERNEL); + if (!path) + return; + + seq_printf(m, "vfio-device-syspath: /sys%s\n", path); + kfree(path); +} +#endif + const struct file_operations vfio_device_fops = { .owner = THIS_MODULE, .open = vfio_device_fops_cdev_open, @@ -1364,6 +1381,9 @@ const struct file_operations vfio_device_fops = { .unlocked_ioctl = vfio_device_fops_unl_ioctl, .compat_ioctl = compat_ptr_ioctl, .mmap = vfio_device_fops_mmap, +#ifdef CONFIG_PROC_FS + .show_fdinfo = vfio_device_show_fdinfo, +#endif }; static struct vfio_device *vfio_device_from_file(struct file *file) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index af1e1fdfd9ed..05a481e4c385 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1318,7 +1318,8 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; - struct device *dma_dev = vdpa_get_dma_dev(vdpa); + union virtio_map map = vdpa_get_map(vdpa); + struct device *dma_dev = map.dma_dev; int ret; /* Device want to do DMA by itself */ @@ -1353,7 +1354,8 @@ err_attach: static void vhost_vdpa_free_domain(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; - struct device *dma_dev = vdpa_get_dma_dev(vdpa); + union virtio_map map = vdpa_get_map(vdpa); + struct device *dma_dev = map.dma_dev; if (v->domain) { iommu_detach_device(v->domain, dma_dev); diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 1778eff7ab00..925858cc6096 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -1115,6 +1115,7 @@ static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, struct iov_iter iter; u64 translated; int ret; + size_t size; ret = iotlb_translate(vrh, (u64)(uintptr_t)src, len - total_translated, &translated, @@ -1132,9 +1133,9 @@ static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, translated); } - ret = copy_from_iter(dst, translated, &iter); - if (ret < 0) - return ret; + size = copy_from_iter(dst, translated, &iter); + if (size != translated) + return -EFAULT; src += translated; dst += translated; diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 7f3fd72678eb..1b93d8c64361 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -205,7 +205,7 @@ static int virtballoon_free_page_report(struct page_reporting_dev_info *pr_dev_i unsigned int unused, err; /* We should always be able to add these buffers to an empty queue. */ - err = virtqueue_add_inbuf(vq, sg, nents, vb, GFP_NOWAIT | __GFP_NOWARN); + err = virtqueue_add_inbuf(vq, sg, nents, vb, GFP_NOWAIT); /* * In the extremely unlikely case that something has occurred and we diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index c147145a6593..7b6205253b46 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -166,7 +166,7 @@ struct vring_virtqueue { bool packed_ring; /* Is DMA API used? */ - bool use_dma_api; + bool use_map_api; /* Can we use weak barriers? */ bool weak_barriers; @@ -210,8 +210,7 @@ struct vring_virtqueue { /* DMA, allocation, and size information */ bool we_own_ring; - /* Device used for doing DMA */ - struct device *dma_dev; + union virtio_map map; #ifdef DEBUG /* They're supposed to lock for us. */ @@ -268,7 +267,7 @@ static bool virtqueue_use_indirect(const struct vring_virtqueue *vq, * unconditionally on data path. */ -static bool vring_use_dma_api(const struct virtio_device *vdev) +static bool vring_use_map_api(const struct virtio_device *vdev) { if (!virtio_has_dma_quirk(vdev)) return true; @@ -291,33 +290,39 @@ static bool vring_use_dma_api(const struct virtio_device *vdev) static bool vring_need_unmap_buffer(const struct vring_virtqueue *vring, const struct vring_desc_extra *extra) { - return vring->use_dma_api && (extra->addr != DMA_MAPPING_ERROR); + return vring->use_map_api && (extra->addr != DMA_MAPPING_ERROR); } size_t virtio_max_dma_size(const struct virtio_device *vdev) { size_t max_segment_size = SIZE_MAX; - if (vring_use_dma_api(vdev)) - max_segment_size = dma_max_mapping_size(vdev->dev.parent); + if (vring_use_map_api(vdev)) { + if (vdev->map) { + max_segment_size = + vdev->map->max_mapping_size(vdev->vmap); + } else + max_segment_size = + dma_max_mapping_size(vdev->dev.parent); + } return max_segment_size; } EXPORT_SYMBOL_GPL(virtio_max_dma_size); static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - struct device *dma_dev) + dma_addr_t *map_handle, gfp_t flag, + union virtio_map map) { - if (vring_use_dma_api(vdev)) { - return dma_alloc_coherent(dma_dev, size, - dma_handle, flag); + if (vring_use_map_api(vdev)) { + return virtqueue_map_alloc_coherent(vdev, map, size, + map_handle, flag); } else { void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); if (queue) { phys_addr_t phys_addr = virt_to_phys(queue); - *dma_handle = (dma_addr_t)phys_addr; + *map_handle = (dma_addr_t)phys_addr; /* * Sanity check: make sure we dind't truncate @@ -330,7 +335,7 @@ static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, * warning and abort if we end up with an * unrepresentable address. */ - if (WARN_ON_ONCE(*dma_handle != phys_addr)) { + if (WARN_ON_ONCE(*map_handle != phys_addr)) { free_pages_exact(queue, PAGE_ALIGN(size)); return NULL; } @@ -340,11 +345,12 @@ static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, } static void vring_free_queue(struct virtio_device *vdev, size_t size, - void *queue, dma_addr_t dma_handle, - struct device *dma_dev) + void *queue, dma_addr_t map_handle, + union virtio_map map) { - if (vring_use_dma_api(vdev)) - dma_free_coherent(dma_dev, size, queue, dma_handle); + if (vring_use_map_api(vdev)) + virtqueue_map_free_coherent(vdev, map, size, + queue, map_handle); else free_pages_exact(queue, PAGE_ALIGN(size)); } @@ -356,7 +362,21 @@ static void vring_free_queue(struct virtio_device *vdev, size_t size, */ static struct device *vring_dma_dev(const struct vring_virtqueue *vq) { - return vq->dma_dev; + return vq->map.dma_dev; +} + +static int vring_mapping_error(const struct vring_virtqueue *vq, + dma_addr_t addr) +{ + struct virtio_device *vdev = vq->vq.vdev; + + if (!vq->use_map_api) + return 0; + + if (vdev->map) + return vdev->map->mapping_error(vq->map, addr); + else + return dma_mapping_error(vring_dma_dev(vq), addr); } /* Map one sg entry. */ @@ -372,7 +392,7 @@ static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *len = sg->length; - if (!vq->use_dma_api) { + if (!vq->use_map_api) { /* * If DMA is not used, KMSAN doesn't know that the scatterlist * is initialized by the hardware. Explicitly check/unpoison it @@ -388,11 +408,11 @@ static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist * the way it expects (we don't guarantee that the scatterlist * will exist for the lifetime of the mapping). */ - *addr = dma_map_page(vring_dma_dev(vq), - sg_page(sg), sg->offset, sg->length, - direction); + *addr = virtqueue_map_page_attrs(&vq->vq, sg_page(sg), + sg->offset, sg->length, + direction, 0); - if (dma_mapping_error(vring_dma_dev(vq), *addr)) + if (vring_mapping_error(vq, *addr)) return -ENOMEM; return 0; @@ -402,20 +422,11 @@ static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, void *cpu_addr, size_t size, enum dma_data_direction direction) { - if (!vq->use_dma_api) + if (!vq->use_map_api) return (dma_addr_t)virt_to_phys(cpu_addr); - return dma_map_single(vring_dma_dev(vq), - cpu_addr, size, direction); -} - -static int vring_mapping_error(const struct vring_virtqueue *vq, - dma_addr_t addr) -{ - if (!vq->use_dma_api) - return 0; - - return dma_mapping_error(vring_dma_dev(vq), addr); + return virtqueue_map_single_attrs(&vq->vq, cpu_addr, + size, direction, 0); } static void virtqueue_init(struct vring_virtqueue *vq, u32 num) @@ -449,24 +460,17 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, flags = extra->flags; if (flags & VRING_DESC_F_INDIRECT) { - if (!vq->use_dma_api) - goto out; - - dma_unmap_single(vring_dma_dev(vq), - extra->addr, - extra->len, - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } else { - if (!vring_need_unmap_buffer(vq, extra)) + if (!vq->use_map_api) goto out; + } else if (!vring_need_unmap_buffer(vq, extra)) + goto out; - dma_unmap_page(vring_dma_dev(vq), - extra->addr, - extra->len, - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } + virtqueue_unmap_page_attrs(&vq->vq, + extra->addr, + extra->len, + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE, + 0); out: return extra->next; @@ -790,7 +794,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, extra = (struct vring_desc_extra *)&indir_desc[num]; - if (vq->use_dma_api) { + if (vq->use_map_api) { for (j = 0; j < num; j++) vring_unmap_one_split(vq, &extra[j]); } @@ -1064,12 +1068,13 @@ err_state: } static void vring_free_split(struct vring_virtqueue_split *vring_split, - struct virtio_device *vdev, struct device *dma_dev) + struct virtio_device *vdev, + union virtio_map map) { vring_free_queue(vdev, vring_split->queue_size_in_bytes, vring_split->vring.desc, vring_split->queue_dma_addr, - dma_dev); + map); kfree(vring_split->desc_state); kfree(vring_split->desc_extra); @@ -1080,7 +1085,7 @@ static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, u32 num, unsigned int vring_align, bool may_reduce_num, - struct device *dma_dev) + union virtio_map map) { void *queue = NULL; dma_addr_t dma_addr; @@ -1096,7 +1101,7 @@ static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, queue = vring_alloc_queue(vdev, vring_size(num, vring_align), &dma_addr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, - dma_dev); + map); if (queue) break; if (!may_reduce_num) @@ -1110,7 +1115,7 @@ static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, /* Try to get a single page. You are my only hope! */ queue = vring_alloc_queue(vdev, vring_size(num, vring_align), &dma_addr, GFP_KERNEL | __GFP_ZERO, - dma_dev); + map); } if (!queue) return -ENOMEM; @@ -1134,7 +1139,7 @@ static struct virtqueue *__vring_new_virtqueue_split(unsigned int index, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name, - struct device *dma_dev) + union virtio_map map) { struct vring_virtqueue *vq; int err; @@ -1157,8 +1162,8 @@ static struct virtqueue *__vring_new_virtqueue_split(unsigned int index, #else vq->broken = false; #endif - vq->dma_dev = dma_dev; - vq->use_dma_api = vring_use_dma_api(vdev); + vq->map = map; + vq->use_map_api = vring_use_map_api(vdev); vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; @@ -1195,21 +1200,21 @@ static struct virtqueue *vring_create_virtqueue_split( bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name, - struct device *dma_dev) + union virtio_map map) { struct vring_virtqueue_split vring_split = {}; struct virtqueue *vq; int err; err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align, - may_reduce_num, dma_dev); + may_reduce_num, map); if (err) return NULL; vq = __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers, - context, notify, callback, name, dma_dev); + context, notify, callback, name, map); if (!vq) { - vring_free_split(&vring_split, vdev, dma_dev); + vring_free_split(&vring_split, vdev, map); return NULL; } @@ -1228,7 +1233,7 @@ static int virtqueue_resize_split(struct virtqueue *_vq, u32 num) err = vring_alloc_queue_split(&vring_split, vdev, num, vq->split.vring_align, vq->split.may_reduce_num, - vring_dma_dev(vq)); + vq->map); if (err) goto err; @@ -1246,7 +1251,7 @@ static int virtqueue_resize_split(struct virtqueue *_vq, u32 num) return 0; err_state_extra: - vring_free_split(&vring_split, vdev, vring_dma_dev(vq)); + vring_free_split(&vring_split, vdev, vq->map); err: virtqueue_reinit_split(vq); return -ENOMEM; @@ -1274,22 +1279,16 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, flags = extra->flags; if (flags & VRING_DESC_F_INDIRECT) { - if (!vq->use_dma_api) - return; - - dma_unmap_single(vring_dma_dev(vq), - extra->addr, extra->len, - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } else { - if (!vring_need_unmap_buffer(vq, extra)) + if (!vq->use_map_api) return; + } else if (!vring_need_unmap_buffer(vq, extra)) + return; - dma_unmap_page(vring_dma_dev(vq), - extra->addr, extra->len, - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } + virtqueue_unmap_page_attrs(&vq->vq, + extra->addr, extra->len, + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE, + 0); } static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, @@ -1366,7 +1365,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, desc[i].addr = cpu_to_le64(addr); desc[i].len = cpu_to_le32(len); - if (unlikely(vq->use_dma_api)) { + if (unlikely(vq->use_map_api)) { extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr; extra[i].len = len; extra[i].flags = n < out_sgs ? 0 : VRING_DESC_F_WRITE; @@ -1388,7 +1387,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, sizeof(struct vring_packed_desc)); vq->packed.vring.desc[head].id = cpu_to_le16(id); - if (vq->use_dma_api) { + if (vq->use_map_api) { vq->packed.desc_extra[id].addr = addr; vq->packed.desc_extra[id].len = total_sg * sizeof(struct vring_packed_desc); @@ -1530,7 +1529,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, desc[i].len = cpu_to_le32(len); desc[i].id = cpu_to_le16(id); - if (unlikely(vq->use_dma_api)) { + if (unlikely(vq->use_map_api)) { vq->packed.desc_extra[curr].addr = premapped ? DMA_MAPPING_ERROR : addr; vq->packed.desc_extra[curr].len = len; @@ -1665,7 +1664,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, vq->free_head = id; vq->vq.num_free += state->num; - if (unlikely(vq->use_dma_api)) { + if (unlikely(vq->use_map_api)) { curr = id; for (i = 0; i < state->num; i++) { vring_unmap_extra_packed(vq, @@ -1683,7 +1682,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, if (!desc) return; - if (vq->use_dma_api) { + if (vq->use_map_api) { len = vq->packed.desc_extra[id].len; num = len / sizeof(struct vring_packed_desc); @@ -1962,25 +1961,25 @@ static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num) static void vring_free_packed(struct vring_virtqueue_packed *vring_packed, struct virtio_device *vdev, - struct device *dma_dev) + union virtio_map map) { if (vring_packed->vring.desc) vring_free_queue(vdev, vring_packed->ring_size_in_bytes, vring_packed->vring.desc, vring_packed->ring_dma_addr, - dma_dev); + map); if (vring_packed->vring.driver) vring_free_queue(vdev, vring_packed->event_size_in_bytes, vring_packed->vring.driver, vring_packed->driver_event_dma_addr, - dma_dev); + map); if (vring_packed->vring.device) vring_free_queue(vdev, vring_packed->event_size_in_bytes, vring_packed->vring.device, vring_packed->device_event_dma_addr, - dma_dev); + map); kfree(vring_packed->desc_state); kfree(vring_packed->desc_extra); @@ -1988,7 +1987,7 @@ static void vring_free_packed(struct vring_virtqueue_packed *vring_packed, static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, struct virtio_device *vdev, - u32 num, struct device *dma_dev) + u32 num, union virtio_map map) { struct vring_packed_desc *ring; struct vring_packed_desc_event *driver, *device; @@ -2000,7 +1999,7 @@ static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, ring = vring_alloc_queue(vdev, ring_size_in_bytes, &ring_dma_addr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, - dma_dev); + map); if (!ring) goto err; @@ -2013,7 +2012,7 @@ static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, driver = vring_alloc_queue(vdev, event_size_in_bytes, &driver_event_dma_addr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, - dma_dev); + map); if (!driver) goto err; @@ -2024,7 +2023,7 @@ static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, device = vring_alloc_queue(vdev, event_size_in_bytes, &device_event_dma_addr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, - dma_dev); + map); if (!device) goto err; @@ -2036,7 +2035,7 @@ static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, return 0; err: - vring_free_packed(vring_packed, vdev, dma_dev); + vring_free_packed(vring_packed, vdev, map); return -ENOMEM; } @@ -2112,7 +2111,7 @@ static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name, - struct device *dma_dev) + union virtio_map map) { struct vring_virtqueue *vq; int err; @@ -2135,8 +2134,8 @@ static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index, vq->broken = false; #endif vq->packed_ring = true; - vq->dma_dev = dma_dev; - vq->use_dma_api = vring_use_dma_api(vdev); + vq->map = map; + vq->use_map_api = vring_use_map_api(vdev); vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; @@ -2173,18 +2172,18 @@ static struct virtqueue *vring_create_virtqueue_packed( bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name, - struct device *dma_dev) + union virtio_map map) { struct vring_virtqueue_packed vring_packed = {}; struct virtqueue *vq; - if (vring_alloc_queue_packed(&vring_packed, vdev, num, dma_dev)) + if (vring_alloc_queue_packed(&vring_packed, vdev, num, map)) return NULL; vq = __vring_new_virtqueue_packed(index, &vring_packed, vdev, weak_barriers, - context, notify, callback, name, dma_dev); + context, notify, callback, name, map); if (!vq) { - vring_free_packed(&vring_packed, vdev, dma_dev); + vring_free_packed(&vring_packed, vdev, map); return NULL; } @@ -2200,7 +2199,7 @@ static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) struct virtio_device *vdev = _vq->vdev; int err; - if (vring_alloc_queue_packed(&vring_packed, vdev, num, vring_dma_dev(vq))) + if (vring_alloc_queue_packed(&vring_packed, vdev, num, vq->map)) goto err_ring; err = vring_alloc_state_extra_packed(&vring_packed); @@ -2217,7 +2216,7 @@ static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) return 0; err_state_extra: - vring_free_packed(&vring_packed, vdev, vring_dma_dev(vq)); + vring_free_packed(&vring_packed, vdev, vq->map); err_ring: virtqueue_reinit_packed(vq); return -ENOMEM; @@ -2448,8 +2447,8 @@ struct device *virtqueue_dma_dev(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - if (vq->use_dma_api) - return vring_dma_dev(vq); + if (vq->use_map_api && !_vq->vdev->map) + return vq->map.dma_dev; else return NULL; } @@ -2734,19 +2733,20 @@ struct virtqueue *vring_create_virtqueue( void (*callback)(struct virtqueue *), const char *name) { + union virtio_map map = {.dma_dev = vdev->dev.parent}; if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) return vring_create_virtqueue_packed(index, num, vring_align, vdev, weak_barriers, may_reduce_num, - context, notify, callback, name, vdev->dev.parent); + context, notify, callback, name, map); return vring_create_virtqueue_split(index, num, vring_align, vdev, weak_barriers, may_reduce_num, - context, notify, callback, name, vdev->dev.parent); + context, notify, callback, name, map); } EXPORT_SYMBOL_GPL(vring_create_virtqueue); -struct virtqueue *vring_create_virtqueue_dma( +struct virtqueue *vring_create_virtqueue_map( unsigned int index, unsigned int num, unsigned int vring_align, @@ -2757,19 +2757,19 @@ struct virtqueue *vring_create_virtqueue_dma( bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name, - struct device *dma_dev) + union virtio_map map) { if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) return vring_create_virtqueue_packed(index, num, vring_align, vdev, weak_barriers, may_reduce_num, - context, notify, callback, name, dma_dev); + context, notify, callback, name, map); return vring_create_virtqueue_split(index, num, vring_align, vdev, weak_barriers, may_reduce_num, - context, notify, callback, name, dma_dev); + context, notify, callback, name, map); } -EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); +EXPORT_SYMBOL_GPL(vring_create_virtqueue_map); /** * virtqueue_resize - resize the vring of vq @@ -2880,6 +2880,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, const char *name) { struct vring_virtqueue_split vring_split = {}; + union virtio_map map = {.dma_dev = vdev->dev.parent}; if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) { struct vring_virtqueue_packed vring_packed = {}; @@ -2889,13 +2890,13 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, return __vring_new_virtqueue_packed(index, &vring_packed, vdev, weak_barriers, context, notify, callback, - name, vdev->dev.parent); + name, map); } vring_init(&vring_split.vring, num, pages, vring_align); return __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers, context, notify, callback, name, - vdev->dev.parent); + map); } EXPORT_SYMBOL_GPL(vring_new_virtqueue); @@ -2909,19 +2910,19 @@ static void vring_free(struct virtqueue *_vq) vq->packed.ring_size_in_bytes, vq->packed.vring.desc, vq->packed.ring_dma_addr, - vring_dma_dev(vq)); + vq->map); vring_free_queue(vq->vq.vdev, vq->packed.event_size_in_bytes, vq->packed.vring.driver, vq->packed.driver_event_dma_addr, - vring_dma_dev(vq)); + vq->map); vring_free_queue(vq->vq.vdev, vq->packed.event_size_in_bytes, vq->packed.vring.device, vq->packed.device_event_dma_addr, - vring_dma_dev(vq)); + vq->map); kfree(vq->packed.desc_state); kfree(vq->packed.desc_extra); @@ -2930,7 +2931,7 @@ static void vring_free(struct virtqueue *_vq) vq->split.queue_size_in_bytes, vq->split.vring.desc, vq->split.queue_dma_addr, - vring_dma_dev(vq)); + vq->map); } } if (!vq->packed_ring) { @@ -3137,7 +3138,108 @@ const struct vring *virtqueue_get_vring(const struct virtqueue *vq) EXPORT_SYMBOL_GPL(virtqueue_get_vring); /** - * virtqueue_dma_map_single_attrs - map DMA for _vq + * virtqueue_map_alloc_coherent - alloc coherent mapping + * @vdev: the virtio device we are talking to + * @map: metadata for performing mapping + * @size: the size of the buffer + * @map_handle: the pointer to the mapped address + * @gfp: allocation flag (GFP_XXX) + * + * return virtual address or NULL on error + */ +void *virtqueue_map_alloc_coherent(struct virtio_device *vdev, + union virtio_map map, + size_t size, dma_addr_t *map_handle, + gfp_t gfp) +{ + if (vdev->map) + return vdev->map->alloc(map, size, + map_handle, gfp); + else + return dma_alloc_coherent(map.dma_dev, size, + map_handle, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_map_alloc_coherent); + +/** + * virtqueue_map_free_coherent - free coherent mapping + * @vdev: the virtio device we are talking to + * @map: metadata for performing mapping + * @size: the size of the buffer + * @map_handle: the mapped address that needs to be freed + * + */ +void virtqueue_map_free_coherent(struct virtio_device *vdev, + union virtio_map map, size_t size, void *vaddr, + dma_addr_t map_handle) +{ + if (vdev->map) + vdev->map->free(map, size, vaddr, + map_handle, 0); + else + dma_free_coherent(map.dma_dev, size, vaddr, map_handle); +} +EXPORT_SYMBOL_GPL(virtqueue_map_free_coherent); + +/** + * virtqueue_map_page_attrs - map a page to the device + * @_vq: the virtqueue we are talking to + * @page: the page that will be mapped by the device + * @offset: the offset in the page for a buffer + * @size: the buffer size + * @dir: mapping direction + * @attrs: mapping attributes + * + * Returns mapped address. Caller should check that by virtqueue_mapping_error(). + */ +dma_addr_t virtqueue_map_page_attrs(const struct virtqueue *_vq, + struct page *page, + unsigned long offset, + size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + const struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; + + if (vdev->map) + return vdev->map->map_page(vq->map, + page, offset, size, + dir, attrs); + + return dma_map_page_attrs(vring_dma_dev(vq), + page, offset, size, + dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_map_page_attrs); + +/** + * virtqueue_unmap_page_attrs - map a page to the device + * @_vq: the virtqueue we are talking to + * @map_handle: the mapped address + * @size: the buffer size + * @dir: mapping direction + * @attrs: unmapping attributes + */ +void virtqueue_unmap_page_attrs(const struct virtqueue *_vq, + dma_addr_t map_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + const struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; + + if (vdev->map) + vdev->map->unmap_page(vq->map, + map_handle, size, dir, attrs); + else + dma_unmap_page_attrs(vring_dma_dev(vq), map_handle, + size, dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_unmap_page_attrs); + +/** + * virtqueue_map_single_attrs - map DMA for _vq * @_vq: the struct virtqueue we're talking about. * @ptr: the pointer of the buffer to do dma * @size: the size of the buffer to do dma @@ -3147,139 +3249,158 @@ EXPORT_SYMBOL_GPL(virtqueue_get_vring); * The caller calls this to do dma mapping in advance. The DMA address can be * passed to this _vq when it is in pre-mapped mode. * - * return DMA address. Caller should check that by virtqueue_dma_mapping_error(). + * return mapped address. Caller should check that by virtqueue_mapping_error(). */ -dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, - size_t size, - enum dma_data_direction dir, - unsigned long attrs) +dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr, + size_t size, + enum dma_data_direction dir, + unsigned long attrs) { - struct vring_virtqueue *vq = to_vvq(_vq); + const struct vring_virtqueue *vq = to_vvq(_vq); - if (!vq->use_dma_api) { + if (!vq->use_map_api) { kmsan_handle_dma(virt_to_phys(ptr), size, dir); return (dma_addr_t)virt_to_phys(ptr); } - return dma_map_single_attrs(vring_dma_dev(vq), ptr, size, dir, attrs); + /* DMA must never operate on areas that might be remapped. */ + if (dev_WARN_ONCE(&_vq->vdev->dev, is_vmalloc_addr(ptr), + "rejecting DMA map of vmalloc memory\n")) + return DMA_MAPPING_ERROR; + + return virtqueue_map_page_attrs(&vq->vq, virt_to_page(ptr), + offset_in_page(ptr), size, dir, attrs); } -EXPORT_SYMBOL_GPL(virtqueue_dma_map_single_attrs); +EXPORT_SYMBOL_GPL(virtqueue_map_single_attrs); /** - * virtqueue_dma_unmap_single_attrs - unmap DMA for _vq + * virtqueue_unmap_single_attrs - unmap map for _vq * @_vq: the struct virtqueue we're talking about. * @addr: the dma address to unmap * @size: the size of the buffer * @dir: DMA direction * @attrs: DMA Attrs * - * Unmap the address that is mapped by the virtqueue_dma_map_* APIs. + * Unmap the address that is mapped by the virtqueue_map_* APIs. * */ -void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) +void virtqueue_unmap_single_attrs(const struct virtqueue *_vq, + dma_addr_t addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) { - struct vring_virtqueue *vq = to_vvq(_vq); + const struct vring_virtqueue *vq = to_vvq(_vq); - if (!vq->use_dma_api) + if (!vq->use_map_api) return; - dma_unmap_single_attrs(vring_dma_dev(vq), addr, size, dir, attrs); + virtqueue_unmap_page_attrs(_vq, addr, size, dir, attrs); } -EXPORT_SYMBOL_GPL(virtqueue_dma_unmap_single_attrs); +EXPORT_SYMBOL_GPL(virtqueue_unmap_single_attrs); /** - * virtqueue_dma_mapping_error - check dma address + * virtqueue_mapping_error - check dma address * @_vq: the struct virtqueue we're talking about. * @addr: DMA address * * Returns 0 means dma valid. Other means invalid dma address. */ -int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr) +int virtqueue_map_mapping_error(const struct virtqueue *_vq, dma_addr_t addr) { - struct vring_virtqueue *vq = to_vvq(_vq); - - if (!vq->use_dma_api) - return 0; + const struct vring_virtqueue *vq = to_vvq(_vq); - return dma_mapping_error(vring_dma_dev(vq), addr); + return vring_mapping_error(vq, addr); } -EXPORT_SYMBOL_GPL(virtqueue_dma_mapping_error); +EXPORT_SYMBOL_GPL(virtqueue_map_mapping_error); /** - * virtqueue_dma_need_sync - check a dma address needs sync + * virtqueue_map_need_sync - check a dma address needs sync * @_vq: the struct virtqueue we're talking about. * @addr: DMA address * - * Check if the dma address mapped by the virtqueue_dma_map_* APIs needs to be + * Check if the dma address mapped by the virtqueue_map_* APIs needs to be * synchronized * * return bool */ -bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr) +bool virtqueue_map_need_sync(const struct virtqueue *_vq, dma_addr_t addr) { - struct vring_virtqueue *vq = to_vvq(_vq); + const struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; - if (!vq->use_dma_api) + if (!vq->use_map_api) return false; - return dma_need_sync(vring_dma_dev(vq), addr); + if (vdev->map) + return vdev->map->need_sync(vq->map, addr); + else + return dma_need_sync(vring_dma_dev(vq), addr); } -EXPORT_SYMBOL_GPL(virtqueue_dma_need_sync); +EXPORT_SYMBOL_GPL(virtqueue_map_need_sync); /** - * virtqueue_dma_sync_single_range_for_cpu - dma sync for cpu + * virtqueue_map_sync_single_range_for_cpu - map sync for cpu * @_vq: the struct virtqueue we're talking about. * @addr: DMA address * @offset: DMA address offset * @size: buf size for sync * @dir: DMA direction * - * Before calling this function, use virtqueue_dma_need_sync() to confirm that + * Before calling this function, use virtqueue_map_need_sync() to confirm that * the DMA address really needs to be synchronized * */ -void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, +void virtqueue_map_sync_single_range_for_cpu(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir) { - struct vring_virtqueue *vq = to_vvq(_vq); - struct device *dev = vring_dma_dev(vq); + const struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; - if (!vq->use_dma_api) + if (!vq->use_map_api) return; - dma_sync_single_range_for_cpu(dev, addr, offset, size, dir); + if (vdev->map) + vdev->map->sync_single_for_cpu(vq->map, + addr + offset, size, dir); + else + dma_sync_single_range_for_cpu(vring_dma_dev(vq), + addr, offset, size, dir); } -EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_cpu); +EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_cpu); /** - * virtqueue_dma_sync_single_range_for_device - dma sync for device + * virtqueue_map_sync_single_range_for_device - map sync for device * @_vq: the struct virtqueue we're talking about. * @addr: DMA address * @offset: DMA address offset * @size: buf size for sync * @dir: DMA direction * - * Before calling this function, use virtqueue_dma_need_sync() to confirm that + * Before calling this function, use virtqueue_map_need_sync() to confirm that * the DMA address really needs to be synchronized */ -void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, +void virtqueue_map_sync_single_range_for_device(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir) { - struct vring_virtqueue *vq = to_vvq(_vq); - struct device *dev = vring_dma_dev(vq); + const struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; - if (!vq->use_dma_api) + if (!vq->use_map_api) return; - dma_sync_single_range_for_device(dev, addr, offset, size, dir); + if (vdev->map) + vdev->map->sync_single_for_device(vq->map, + addr + offset, + size, dir); + else + dma_sync_single_range_for_device(vring_dma_dev(vq), addr, + offset, size, dir); } -EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_device); +EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_device); MODULE_DESCRIPTION("Virtio ring implementation"); MODULE_LICENSE("GPL"); diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 657b07a60788..f9a29045eca0 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -133,12 +133,12 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, const char *name, bool ctx) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); - struct device *dma_dev; const struct vdpa_config_ops *ops = vdpa->config; bool (*notify)(struct virtqueue *vq) = virtio_vdpa_notify; struct vdpa_callback cb; struct virtqueue *vq; u64 desc_addr, driver_addr, device_addr; + union virtio_map map = {0}; /* Assume split virtqueue, switch to packed if necessary */ struct vdpa_vq_state state = {0}; u32 align, max_num, min_num = 1; @@ -176,23 +176,27 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, if (ops->get_vq_num_min) min_num = ops->get_vq_num_min(vdpa); - may_reduce_num = (max_num == min_num) ? false : true; + may_reduce_num = (max_num != min_num); /* Create the vring */ align = ops->get_vq_align(vdpa); - if (ops->get_vq_dma_dev) - dma_dev = ops->get_vq_dma_dev(vdpa, index); + if (ops->get_vq_map) + map = ops->get_vq_map(vdpa, index); else - dma_dev = vdpa_get_dma_dev(vdpa); - vq = vring_create_virtqueue_dma(index, max_num, align, vdev, + map = vdpa_get_map(vdpa); + + vq = vring_create_virtqueue_map(index, max_num, align, vdev, true, may_reduce_num, ctx, - notify, callback, name, dma_dev); + notify, callback, name, map); if (!vq) { err = -ENOMEM; goto error_new_virtqueue; } + if (index == 0) + vdev->vmap = map; + vq->num_max = max_num; /* Setup virtqueue callback */ @@ -462,9 +466,11 @@ static int virtio_vdpa_probe(struct vdpa_device *vdpa) if (!vd_dev) return -ENOMEM; - vd_dev->vdev.dev.parent = vdpa_get_dma_dev(vdpa); + vd_dev->vdev.dev.parent = vdpa->map ? &vdpa->dev : + vdpa_get_map(vdpa).dma_dev; vd_dev->vdev.dev.release = virtio_vdpa_release_dev; vd_dev->vdev.config = &virtio_vdpa_config_ops; + vd_dev->vdev.map = vdpa->map; vd_dev->vdpa = vdpa; vd_dev->vdev.id.device = ops->get_device_id(vdpa); |