From 1da33858af6250184d2ef907494d698af03283de Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 31 Jul 2025 21:38:18 +0100
Subject: regmap: irq: Free the regmap-irq mutex

We do not currently free the mutex allocated by regmap-irq, do so.

Tested-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://patch.msgid.link/20250731-regmap-irq-nesting-v1-1-98b4d1bf20f0@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-irq.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index d1585f073776..4aac12d38215 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -816,7 +816,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 						     d->mask_buf[i],
 						     chip->irq_drv_data);
 			if (ret)
-				goto err_alloc;
+				goto err_mutex;
 		}
 
 		if (chip->mask_base && !chip->handle_mask_sync) {
@@ -827,7 +827,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 			if (ret) {
 				dev_err(map->dev, "Failed to set masks in 0x%x: %d\n",
 					reg, ret);
-				goto err_alloc;
+				goto err_mutex;
 			}
 		}
 
@@ -838,7 +838,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 			if (ret) {
 				dev_err(map->dev, "Failed to set masks in 0x%x: %d\n",
 					reg, ret);
-				goto err_alloc;
+				goto err_mutex;
 			}
 		}
 
@@ -855,7 +855,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 			if (ret != 0) {
 				dev_err(map->dev, "Failed to read IRQ status: %d\n",
 					ret);
-				goto err_alloc;
+				goto err_mutex;
 			}
 		}
 
@@ -879,7 +879,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 			if (ret != 0) {
 				dev_err(map->dev, "Failed to ack 0x%x: %d\n",
 					reg, ret);
-				goto err_alloc;
+				goto err_mutex;
 			}
 		}
 	}
@@ -901,7 +901,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 			if (ret != 0) {
 				dev_err(map->dev, "Failed to set masks in 0x%x: %d\n",
 					reg, ret);
-				goto err_alloc;
+				goto err_mutex;
 			}
 		}
 	}
@@ -910,7 +910,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 	if (chip->status_is_level) {
 		ret = read_irq_data(d);
 		if (ret < 0)
-			goto err_alloc;
+			goto err_mutex;
 
 		memcpy(d->prev_status_buf, d->status_buf,
 		       array_size(d->chip->num_regs, sizeof(d->prev_status_buf[0])));
@@ -918,7 +918,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 
 	ret = regmap_irq_create_domain(fwnode, irq_base, chip, d);
 	if (ret)
-		goto err_alloc;
+		goto err_mutex;
 
 	ret = request_threaded_irq(irq, NULL, regmap_irq_thread,
 				   irq_flags | IRQF_ONESHOT,
@@ -935,6 +935,8 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 
 err_domain:
 	/* Should really dispose of the domain but... */
+err_mutex:
+	mutex_destroy(&d->lock);
 err_alloc:
 	kfree(d->type_buf);
 	kfree(d->type_buf_def);
@@ -1027,6 +1029,7 @@ void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d)
 			kfree(d->config_buf[i]);
 		kfree(d->config_buf);
 	}
+	mutex_destroy(&d->lock);
 	kfree(d);
 }
 EXPORT_SYMBOL_GPL(regmap_del_irq_chip);
-- 
cgit v1.2.3


From 76b6e14aa7b081337d118a82397d919b5e072bb4 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 31 Jul 2025 21:38:19 +0100
Subject: regmap: irq: Avoid lockdep warnings with nested regmap-irq chips

While handling interrupts through regmap-irq we use a mutex to protect the
updates we are caching while genirq runs in atomic context. Russell King
reported that while running on the nVidia Jetson Xavier NX this generates
lockdep warnings since that platform has a regmap-irq for the max77686 RTC
which is a child of a max77620 which also uses regmap-irq.

[   46.723127] rtcwake/3984 is trying to acquire lock:
[   46.723235] ffff0000813b2c68 (&d->lock){+.+.}-{4:4}, at: regmap_irq_lock+0x18/0x24
[   46.723452]
               but task is already holding lock:
[   46.723556] ffff00008504dc68 (&d->lock){+.+.}-{4:4}, at: regmap_irq_lock+0x18/0x24

This happens because by default lockdep uses a single lockdep class for all
mutexes initialised from a single mutex_init() call and is unable to tell
that two distinct mutex are being taken and verify that the ordering of
operations is safe. This should be a very rare situation since normally
anything using regmap-irq will be a leaf interrupt controller due to being
on a slow bus like I2C.

We can avoid these warnings by providing the lockdep key for the regmap-irq
explicitly, allocating one for each chip so that lockdep can distinguish
between them.

Thanks to Russell for the report and analysis.

Reported-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Tested-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://patch.msgid.link/20250731-regmap-irq-nesting-v1-2-98b4d1bf20f0@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-irq.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'drivers/base')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index 4aac12d38215..6112d942499b 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -21,6 +21,7 @@
 
 struct regmap_irq_chip_data {
 	struct mutex lock;
+	struct lock_class_key lock_key;
 	struct irq_chip irq_chip;
 
 	struct regmap *map;
@@ -801,7 +802,13 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 			goto err_alloc;
 	}
 
-	mutex_init(&d->lock);
+	/*
+	 * If one regmap-irq is the parent of another then we'll try
+	 * to lock the child with the parent locked, use an explicit
+	 * lock_key so lockdep can figure out what's going on.
+	 */
+	lockdep_register_key(&d->lock_key);
+	mutex_init_with_key(&d->lock, &d->lock_key);
 
 	for (i = 0; i < chip->num_irqs; i++)
 		d->mask_buf_def[chip->irqs[i].reg_offset / map->reg_stride]
@@ -937,6 +944,7 @@ err_domain:
 	/* Should really dispose of the domain but... */
 err_mutex:
 	mutex_destroy(&d->lock);
+	lockdep_unregister_key(&d->lock_key);
 err_alloc:
 	kfree(d->type_buf);
 	kfree(d->type_buf_def);
@@ -1030,6 +1038,7 @@ void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d)
 		kfree(d->config_buf);
 	}
 	mutex_destroy(&d->lock);
+	lockdep_unregister_key(&d->lock_key);
 	kfree(d);
 }
 EXPORT_SYMBOL_GPL(regmap_del_irq_chip);
-- 
cgit v1.2.3


From 886f42ce96e7ce80545704e7168a9c6b60cd6c03 Mon Sep 17 00:00:00 2001
From: Luis Henriques <luis@igalia.com>
Date: Mon, 28 Jul 2025 16:08:29 +0100
Subject: regmap: mmio: Add missing MODULE_DESCRIPTION()

There were already several commits to add module descriptions to regmap
modules.  But this one was still missing:

WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/base/regmap/regmap-mmio.o

Signed-off-by: Luis Henriques <luis@igalia.com>
Link: https://patch.msgid.link/20250728150829.11890-1-luis@igalia.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-mmio.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/base')

diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c
index 99d7fd85ca7d..29e5f3175301 100644
--- a/drivers/base/regmap/regmap-mmio.c
+++ b/drivers/base/regmap/regmap-mmio.c
@@ -609,4 +609,5 @@ void regmap_mmio_detach_clk(struct regmap *map)
 }
 EXPORT_SYMBOL_GPL(regmap_mmio_detach_clk);
 
+MODULE_DESCRIPTION("regmap MMIO Module");
 MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 5c36b86d2bf68fbcad16169983ef7ee8c537db59 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 13 Aug 2025 15:07:18 +0200
Subject: regmap: Remove superfluous check for !config in __regmap_init()

The first thing __regmap_init() do is check if config is non-NULL,
so there is no need to check for this again later.

Fixes: d77e745613680c54 ("regmap: Add bulk read/write callbacks into regmap_config")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://patch.msgid.link/a154d9db0f290dda96b48bd817eb743773e846e1.1755090330.git.geert+renesas@glider.be
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/base')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 1f3f782a04ba..6883e1a43fe5 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -827,7 +827,7 @@ struct regmap *__regmap_init(struct device *dev,
 		map->read_flag_mask = bus->read_flag_mask;
 	}
 
-	if (config && config->read && config->write) {
+	if (config->read && config->write) {
 		map->reg_read  = _regmap_bus_read;
 		if (config->reg_update_bits)
 			map->reg_update_bits = config->reg_update_bits;
-- 
cgit v1.2.3


From 556c1ad666ad90c50ec8fccb930dd5046cfbecfb Mon Sep 17 00:00:00 2001
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Thu, 14 Aug 2025 10:20:42 -0700
Subject: x86/vmscape: Enable the mitigation

Enable the previously added mitigation for VMscape. Add the cmdline
vmscape={off|ibpb|force} and sysfs reporting.

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
---
 Documentation/ABI/testing/sysfs-devices-system-cpu |  1 +
 Documentation/admin-guide/kernel-parameters.txt    | 11 +++
 arch/x86/Kconfig                                   |  9 +++
 arch/x86/kernel/cpu/bugs.c                         | 90 ++++++++++++++++++++++
 drivers/base/cpu.c                                 |  3 +
 include/linux/cpu.h                                |  1 +
 6 files changed, 115 insertions(+)

(limited to 'drivers/base')

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index ab8cd337f43a..8aed6d94c4cd 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -586,6 +586,7 @@ What:		/sys/devices/system/cpu/vulnerabilities
 		/sys/devices/system/cpu/vulnerabilities/srbds
 		/sys/devices/system/cpu/vulnerabilities/tsa
 		/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+		/sys/devices/system/cpu/vulnerabilities/vmscape
 Date:		January 2018
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:	Information about CPU vulnerabilities
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 747a55abf494..5a7a83c411e9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3829,6 +3829,7 @@
 					       srbds=off [X86,INTEL]
 					       ssbd=force-off [ARM64]
 					       tsx_async_abort=off [X86]
+					       vmscape=off [X86]
 
 				Exceptions:
 					       This does not have any effect on
@@ -8041,6 +8042,16 @@
 	vmpoff=		[KNL,S390] Perform z/VM CP command after power off.
 			Format: <command>
 
+	vmscape=	[X86] Controls mitigation for VMscape attacks.
+			VMscape attacks can leak information from a userspace
+			hypervisor to a guest via speculative side-channels.
+
+			off		- disable the mitigation
+			ibpb		- use Indirect Branch Prediction Barrier
+					  (IBPB) mitigation (default)
+			force		- force vulnerability detection even on
+					  unaffected processors
+
 	vsyscall=	[X86-64,EARLY]
 			Controls the behavior of vsyscalls (i.e. calls to
 			fixed addresses of 0xffffffffff600x00 from legacy
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 58d890fe2100..52c8910ba2ef 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2701,6 +2701,15 @@ config MITIGATION_TSA
 	  security vulnerability on AMD CPUs which can lead to forwarding of
 	  invalid info to subsequent instructions and thus can affect their
 	  timing and thereby cause a leakage.
+
+config MITIGATION_VMSCAPE
+	bool "Mitigate VMSCAPE"
+	depends on KVM
+	default y
+	help
+	  Enable mitigation for VMSCAPE attacks. VMSCAPE is a hardware security
+	  vulnerability on Intel and AMD CPUs that may allow a guest to do
+	  Spectre v2 style attacks on userspace hypervisor.
 endif
 
 config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 410f8df8b77a..c81024dfc4c8 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -96,6 +96,9 @@ static void __init its_update_mitigation(void);
 static void __init its_apply_mitigation(void);
 static void __init tsa_select_mitigation(void);
 static void __init tsa_apply_mitigation(void);
+static void __init vmscape_select_mitigation(void);
+static void __init vmscape_update_mitigation(void);
+static void __init vmscape_apply_mitigation(void);
 
 /* The base value of the SPEC_CTRL MSR without task-specific bits set */
 u64 x86_spec_ctrl_base;
@@ -270,6 +273,7 @@ void __init cpu_select_mitigations(void)
 	its_select_mitigation();
 	bhi_select_mitigation();
 	tsa_select_mitigation();
+	vmscape_select_mitigation();
 
 	/*
 	 * After mitigations are selected, some may need to update their
@@ -301,6 +305,7 @@ void __init cpu_select_mitigations(void)
 	bhi_update_mitigation();
 	/* srso_update_mitigation() depends on retbleed_update_mitigation(). */
 	srso_update_mitigation();
+	vmscape_update_mitigation();
 
 	spectre_v1_apply_mitigation();
 	spectre_v2_apply_mitigation();
@@ -318,6 +323,7 @@ void __init cpu_select_mitigations(void)
 	its_apply_mitigation();
 	bhi_apply_mitigation();
 	tsa_apply_mitigation();
+	vmscape_apply_mitigation();
 }
 
 /*
@@ -3322,6 +3328,77 @@ static void __init srso_apply_mitigation(void)
 	}
 }
 
+#undef pr_fmt
+#define pr_fmt(fmt)	"VMSCAPE: " fmt
+
+enum vmscape_mitigations {
+	VMSCAPE_MITIGATION_NONE,
+	VMSCAPE_MITIGATION_AUTO,
+	VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER,
+	VMSCAPE_MITIGATION_IBPB_ON_VMEXIT,
+};
+
+static const char * const vmscape_strings[] = {
+	[VMSCAPE_MITIGATION_NONE]		= "Vulnerable",
+	/* [VMSCAPE_MITIGATION_AUTO] */
+	[VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER]	= "Mitigation: IBPB before exit to userspace",
+	[VMSCAPE_MITIGATION_IBPB_ON_VMEXIT]	= "Mitigation: IBPB on VMEXIT",
+};
+
+static enum vmscape_mitigations vmscape_mitigation __ro_after_init =
+	IS_ENABLED(CONFIG_MITIGATION_VMSCAPE) ? VMSCAPE_MITIGATION_AUTO : VMSCAPE_MITIGATION_NONE;
+
+static int __init vmscape_parse_cmdline(char *str)
+{
+	if (!str)
+		return -EINVAL;
+
+	if (!strcmp(str, "off")) {
+		vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
+	} else if (!strcmp(str, "ibpb")) {
+		vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
+	} else if (!strcmp(str, "force")) {
+		setup_force_cpu_bug(X86_BUG_VMSCAPE);
+		vmscape_mitigation = VMSCAPE_MITIGATION_AUTO;
+	} else {
+		pr_err("Ignoring unknown vmscape=%s option.\n", str);
+	}
+
+	return 0;
+}
+early_param("vmscape", vmscape_parse_cmdline);
+
+static void __init vmscape_select_mitigation(void)
+{
+	if (cpu_mitigations_off() ||
+	    !boot_cpu_has_bug(X86_BUG_VMSCAPE) ||
+	    !boot_cpu_has(X86_FEATURE_IBPB)) {
+		vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
+		return;
+	}
+
+	if (vmscape_mitigation == VMSCAPE_MITIGATION_AUTO)
+		vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
+}
+
+static void __init vmscape_update_mitigation(void)
+{
+	if (!boot_cpu_has_bug(X86_BUG_VMSCAPE))
+		return;
+
+	if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB ||
+	    srso_mitigation == SRSO_MITIGATION_IBPB_ON_VMEXIT)
+		vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_ON_VMEXIT;
+
+	pr_info("%s\n", vmscape_strings[vmscape_mitigation]);
+}
+
+static void __init vmscape_apply_mitigation(void)
+{
+	if (vmscape_mitigation == VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER)
+		setup_force_cpu_cap(X86_FEATURE_IBPB_EXIT_TO_USER);
+}
+
 #undef pr_fmt
 #define pr_fmt(fmt) fmt
 
@@ -3570,6 +3647,11 @@ static ssize_t tsa_show_state(char *buf)
 	return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]);
 }
 
+static ssize_t vmscape_show_state(char *buf)
+{
+	return sysfs_emit(buf, "%s\n", vmscape_strings[vmscape_mitigation]);
+}
+
 static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
 			       char *buf, unsigned int bug)
 {
@@ -3636,6 +3718,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 	case X86_BUG_TSA:
 		return tsa_show_state(buf);
 
+	case X86_BUG_VMSCAPE:
+		return vmscape_show_state(buf);
+
 	default:
 		break;
 	}
@@ -3727,6 +3812,11 @@ ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *bu
 {
 	return cpu_show_common(dev, attr, buf, X86_BUG_TSA);
 }
+
+ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpu_show_common(dev, attr, buf, X86_BUG_VMSCAPE);
+}
 #endif
 
 void __warn_thunk(void)
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index efc575a00edd..008da0354fba 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -603,6 +603,7 @@ CPU_SHOW_VULN_FALLBACK(ghostwrite);
 CPU_SHOW_VULN_FALLBACK(old_microcode);
 CPU_SHOW_VULN_FALLBACK(indirect_target_selection);
 CPU_SHOW_VULN_FALLBACK(tsa);
+CPU_SHOW_VULN_FALLBACK(vmscape);
 
 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
@@ -622,6 +623,7 @@ static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL);
 static DEVICE_ATTR(old_microcode, 0444, cpu_show_old_microcode, NULL);
 static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL);
 static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL);
+static DEVICE_ATTR(vmscape, 0444, cpu_show_vmscape, NULL);
 
 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_meltdown.attr,
@@ -642,6 +644,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_old_microcode.attr,
 	&dev_attr_indirect_target_selection.attr,
 	&dev_attr_tsa.attr,
+	&dev_attr_vmscape.attr,
 	NULL
 };
 
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index b91b993f58ee..487b3bf2e1ea 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -83,6 +83,7 @@ extern ssize_t cpu_show_old_microcode(struct device *dev,
 extern ssize_t cpu_show_indirect_target_selection(struct device *dev,
 						  struct device_attribute *attr, char *buf);
 extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,
-- 
cgit v1.2.3


From 292cb391479d50f4379a0abab34324de92c82a92 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 13 Aug 2025 23:03:52 -0700
Subject: software node: Constify node_group in registration functions

The software_node_register_node_group() and
software_node_unregister_node_group() functions take in essence an
array of pointers to software_node structs. Since the functions do not
modify the array declare the argument as constant, so that static
arrays can be declared as const and annotated as __initconst.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Link: https://lore.kernel.org/r/2zny5grbgtwbplynxffxg6dkgjgqf45aigwmgxio5stesdr3wi@gf2zamk5amic
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/swnode.c    | 5 ++---
 include/linux/property.h | 4 ++--
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c
index deda7f35a059..be1e9e61a7bf 100644
--- a/drivers/base/swnode.c
+++ b/drivers/base/swnode.c
@@ -844,7 +844,7 @@ swnode_register(const struct software_node *node, struct swnode *parent,
  * of this function or by ordering the array such that parent comes before
  * child.
  */
-int software_node_register_node_group(const struct software_node **node_group)
+int software_node_register_node_group(const struct software_node * const *node_group)
 {
 	unsigned int i;
 	int ret;
@@ -877,8 +877,7 @@ EXPORT_SYMBOL_GPL(software_node_register_node_group);
  * remove the nodes individually, in the correct order (child before
  * parent).
  */
-void software_node_unregister_node_group(
-		const struct software_node **node_group)
+void software_node_unregister_node_group(const struct software_node * const *node_group)
 {
 	unsigned int i = 0;
 
diff --git a/include/linux/property.h b/include/linux/property.h
index 82f0cb3abd1e..d1e80b3c9918 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -574,8 +574,8 @@ const struct software_node *
 software_node_find_by_name(const struct software_node *parent,
 			   const char *name);
 
-int software_node_register_node_group(const struct software_node **node_group);
-void software_node_unregister_node_group(const struct software_node **node_group);
+int software_node_register_node_group(const struct software_node * const *node_group);
+void software_node_unregister_node_group(const struct software_node * const *node_group);
 
 int software_node_register(const struct software_node *node);
 void software_node_unregister(const struct software_node *node);
-- 
cgit v1.2.3


From e246518aa24f1460902725e97d0abf574aec6ade Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 26 Aug 2025 13:43:47 +0200
Subject: PM: sleep: annotate RCU list iterations

These iterations require the read lock, otherwise RCU
lockdep will splat:

=============================
WARNING: suspicious RCU usage
6.17.0-rc3-00014-g31419c045d64 #6 Tainted: G           O
-----------------------------
drivers/base/power/main.c:1333 RCU-list traversed in non-reader section!!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1
5 locks held by rtcwake/547:
 #0: 00000000643ab418 (sb_writers#6){.+.+}-{0:0}, at: file_start_write+0x2b/0x3a
 #1: 0000000067a0ca88 (&of->mutex#2){+.+.}-{4:4}, at: kernfs_fop_write_iter+0x181/0x24b
 #2: 00000000631eac40 (kn->active#3){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x191/0x24b
 #3: 00000000609a1308 (system_transition_mutex){+.+.}-{4:4}, at: pm_suspend+0xaf/0x30b
 #4: 0000000060c0fdb0 (device_links_srcu){.+.+}-{0:0}, at: device_links_read_lock+0x75/0x98

stack backtrace:
CPU: 0 UID: 0 PID: 547 Comm: rtcwake Tainted: G           O        6.17.0-rc3-00014-g31419c045d64 #6 VOLUNTARY
Tainted: [O]=OOT_MODULE
Stack:
 223721b3a80 6089eac6 00000001 00000001
 ffffff00 6089eac6 00000535 6086e528
 721b3ac0 6003c294 00000000 60031fc0
Call Trace:
 [<600407ed>] show_stack+0x10e/0x127
 [<6003c294>] dump_stack_lvl+0x77/0xc6
 [<6003c2fd>] dump_stack+0x1a/0x20
 [<600bc2f8>] lockdep_rcu_suspicious+0x116/0x13e
 [<603d8ea1>] dpm_async_suspend_superior+0x117/0x17e
 [<603d980f>] device_suspend+0x528/0x541
 [<603da24b>] dpm_suspend+0x1a2/0x267
 [<603da837>] dpm_suspend_start+0x5d/0x72
 [<600ca0c9>] suspend_devices_and_enter+0xab/0x736
 [...]

Add the fourth argument to the iteration to annotate
this and avoid the splat.

Fixes: 06799631d522 ("PM: sleep: Make async suspend handle suppliers like parents")
Fixes: ed18738fff02 ("PM: sleep: Make async resume handle consumers like children")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://patch.msgid.link/20250826134348.aba79f6e6299.I9ecf55da46ccf33778f2c018a82e1819d815b348@changeid
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index dbf5456cd891..2ea6e05e6ec9 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -675,7 +675,7 @@ static void dpm_async_resume_subordinate(struct device *dev, async_func_t func)
 	idx = device_links_read_lock();
 
 	/* Start processing the device's "async" consumers. */
-	list_for_each_entry_rcu(link, &dev->links.consumers, s_node)
+	list_for_each_entry_rcu_locked(link, &dev->links.consumers, s_node)
 		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
 			dpm_async_with_cleanup(link->consumer, func);
 
@@ -1330,7 +1330,7 @@ static void dpm_async_suspend_superior(struct device *dev, async_func_t func)
 	idx = device_links_read_lock();
 
 	/* Start processing the device's "async" suppliers. */
-	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
+	list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node)
 		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
 			dpm_async_with_cleanup(link->supplier, func);
 
-- 
cgit v1.2.3


From f4672dc6e9c07643c8c755856ba8e9eb9ca95d0c Mon Sep 17 00:00:00 2001
From: Qianfeng Rong <rongqianfeng@vivo.com>
Date: Thu, 28 Aug 2025 23:07:01 +0800
Subject: regmap: use int type to store negative error codes

Change the 'ret' variable from unsigned int to int to store negative error
codes or zero returned by regmap_field_read() and regmap_read(), and change
'-1' to 'negative errno' in the comments.

Storing the negative error codes in unsigned type, doesn't cause an issue
at runtime but it's ugly as pants. Additionally, assigning negative error
codes to unsigned type may trigger a GCC warning when the -Wsign-conversion
flag is enabled.

No effect on runtime.

Signed-off-by: Qianfeng Rong <rongqianfeng@vivo.com>
Message-ID: <20250828150702.193288-1-rongqianfeng@vivo.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 6883e1a43fe5..ce9be3989a21 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2258,12 +2258,14 @@ EXPORT_SYMBOL_GPL(regmap_field_update_bits_base);
  * @field: Register field to operate on
  * @bits: Bits to test
  *
- * Returns -1 if the underlying regmap_field_read() fails, 0 if at least one of the
- * tested bits is not set and 1 if all tested bits are set.
+ * Returns negative errno if the underlying regmap_field_read() fails,
+ * 0 if at least one of the tested bits is not set and 1 if all tested
+ * bits are set.
  */
 int regmap_field_test_bits(struct regmap_field *field, unsigned int bits)
 {
-	unsigned int val, ret;
+	unsigned int val;
+	int ret;
 
 	ret = regmap_field_read(field, &val);
 	if (ret)
@@ -3309,7 +3311,8 @@ EXPORT_SYMBOL_GPL(regmap_update_bits_base);
  */
 int regmap_test_bits(struct regmap *map, unsigned int reg, unsigned int bits)
 {
-	unsigned int val, ret;
+	unsigned int val;
+	int ret;
 
 	ret = regmap_read(map, reg, &val);
 	if (ret)
-- 
cgit v1.2.3


From b57fc652ca24ada3b0c888327f9944ed21559286 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 15:29:05 -0700
Subject: drivers/base/node: Add a helper function node_update_perf_attrs()

Add helper function node_update_perf_attrs() to allow update of node access
coordinates computed by an external agent such as CXL. The helper allows
updating of coordinates after the attribute being created by HMAT.

Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20250829222907.1290912-3-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/base/node.c  | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/node.h |  8 ++++++++
 2 files changed, 46 insertions(+)

(limited to 'drivers/base')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 3399594136b2..db18a1c32637 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -248,6 +248,44 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
 }
 EXPORT_SYMBOL_GPL(node_set_perf_attrs);
 
+/**
+ * node_update_perf_attrs - Update the performance values for given access class
+ * @nid: Node identifier to be updated
+ * @coord: Heterogeneous memory performance coordinates
+ * @access: The access class for the given attributes
+ */
+void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord,
+			    enum access_coordinate_class access)
+{
+	struct node_access_nodes *access_node;
+	struct node *node;
+	int i;
+
+	if (WARN_ON_ONCE(!node_online(nid)))
+		return;
+
+	node = node_devices[nid];
+	list_for_each_entry(access_node, &node->access_list, list_node) {
+		if (access_node->access != access)
+			continue;
+
+		access_node->coord = *coord;
+		for (i = 0; access_attrs[i]; i++) {
+			sysfs_notify(&access_node->dev.kobj,
+				     NULL, access_attrs[i]->name);
+		}
+		break;
+	}
+
+	/* When setting CPU access coordinates, update mempolicy */
+	if (access != ACCESS_COORDINATE_CPU)
+		return;
+
+	if (mempolicy_set_node_perf(nid, coord))
+		pr_info("failed to set mempolicy attrs for node %d\n", nid);
+}
+EXPORT_SYMBOL_GPL(node_update_perf_attrs);
+
 /**
  * struct node_cache_info - Internal tracking for memory node caches
  * @dev:	Device represeting the cache level
diff --git a/include/linux/node.h b/include/linux/node.h
index 2c7529335b21..866e3323f1fd 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -85,6 +85,8 @@ struct node_cache_attrs {
 void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs);
 void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
 			 enum access_coordinate_class access);
+void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord,
+			    enum access_coordinate_class access);
 #else
 static inline void node_add_cache(unsigned int nid,
 				  struct node_cache_attrs *cache_attrs)
@@ -96,6 +98,12 @@ static inline void node_set_perf_attrs(unsigned int nid,
 				       enum access_coordinate_class access)
 {
 }
+
+static inline void node_update_perf_attrs(unsigned int nid,
+					  struct access_coordinate *coord,
+					  enum access_coordinate_class access)
+{
+}
 #endif
 
 struct node {
-- 
cgit v1.2.3


From be82483d1b60baf6747884bd74cb7de484deaf76 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 2 Sep 2025 15:55:45 +0200
Subject: PM: sleep: core: Clear power.must_resume in noirq suspend error path

If system suspend is aborted in the "noirq" phase (for instance, due to
an error returned by one of the device callbacks), power.is_noirq_suspended
will not be set for some devices and device_resume_noirq() will return
early for them.  Consequently, noirq resume callbacks will not run for
them at all because the noirq suspend callbacks have not run for them
yet.

If any of them has power.must_resume set and late suspend has been
skipped for it (due to power.smart_suspend), early resume should be
skipped for it either, or its state may become inconsistent (for
instance, if the early resume assumes that it will always follow
noirq resume).

Make that happen by clearing power.must_resume in device_resume_noirq()
for devices with power.is_noirq_suspended clear that have been left in
suspend by device_suspend_late(), which will subsequently cause
device_resume_early() to leave the device in suspend and avoid
changing its state.

Fixes: 0d4b54c6fee8 ("PM / core: Add LEAVE_SUSPENDED driver flag")
Link: https://lore.kernel.org/linux-pm/5d692b81-6f58-4e86-9cb0-ede69a09d799@rowland.harvard.edu/
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://patch.msgid.link/3381776.aeNJFYEL58@rafael.j.wysocki
---
 drivers/base/power/main.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'drivers/base')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 2ea6e05e6ec9..c883b01ffbdd 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -724,8 +724,20 @@ static void device_resume_noirq(struct device *dev, pm_message_t state, bool asy
 	if (dev->power.syscore || dev->power.direct_complete)
 		goto Out;
 
-	if (!dev->power.is_noirq_suspended)
+	if (!dev->power.is_noirq_suspended) {
+		/*
+		 * This means that system suspend has been aborted in the noirq
+		 * phase before invoking the noirq suspend callback for the
+		 * device, so if device_suspend_late() has left it in suspend,
+		 * device_resume_early() should leave it in suspend either in
+		 * case the early resume of it depends on the noirq resume that
+		 * has not run.
+		 */
+		if (dev_pm_skip_suspend(dev))
+			dev->power.must_resume = false;
+
 		goto Out;
+	}
 
 	if (!dpm_wait_for_superior(dev, async))
 		goto Out;
-- 
cgit v1.2.3


From 1ad926459970444af1140f9b393f416536e1a828 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 28 Aug 2025 12:56:41 +0200
Subject: driver core: faux: Set power.no_pm for faux devices

Since faux devices are not supposed to be involved in any kind of
power management, set the no_pm flag for all of them.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: stable <stable@kernel.org>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/6206518.lOV4Wx5bFT@rafael.j.wysocki
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/faux.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/base')

diff --git a/drivers/base/faux.c b/drivers/base/faux.c
index f5fbda0a9a44..21dd02124231 100644
--- a/drivers/base/faux.c
+++ b/drivers/base/faux.c
@@ -155,6 +155,7 @@ struct faux_device *faux_device_create_with_groups(const char *name,
 		dev->parent = &faux_bus_root;
 	dev->bus = &faux_bus_type;
 	dev_set_name(dev, "%s", name);
+	device_set_pm_not_required(dev);
 
 	ret = device_add(dev);
 	if (ret) {
-- 
cgit v1.2.3


From fdd9ae23bb989fa9ed1beebba7d3e0c82c7c81ae Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 2 Sep 2025 15:43:50 +0200
Subject: PM: core: Annotate loops walking device links as _srcu

Since SRCU is used for the protection of device link lists, the loops
over device link lists in multiple places in drivers/base/power/main.c
and in pm_runtime_get_suppliers() should be annotated as _srcu rather
than as _rcu which is the case currently.

Change the annotations accordingly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/2393512.ElGaqSPkdT@rafael.j.wysocki
---
 drivers/base/power/main.c    | 18 +++++++++---------
 drivers/base/power/runtime.c |  4 ++--
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 2ea6e05e6ec9..2fa53896db82 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -40,8 +40,8 @@
 
 typedef int (*pm_callback_t)(struct device *);
 
-#define list_for_each_entry_rcu_locked(pos, head, member) \
-	list_for_each_entry_rcu(pos, head, member, \
+#define list_for_each_entry_srcu_locked(pos, head, member) \
+	list_for_each_entry_srcu(pos, head, member, \
 			device_links_read_lock_held())
 
 /*
@@ -281,7 +281,7 @@ static void dpm_wait_for_suppliers(struct device *dev, bool async)
 	 * callbacks freeing the link objects for the links in the list we're
 	 * walking.
 	 */
-	list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node)
+	list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node)
 		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
 			dpm_wait(link->supplier, async);
 
@@ -338,7 +338,7 @@ static void dpm_wait_for_consumers(struct device *dev, bool async)
 	 * continue instead of trying to continue in parallel with its
 	 * unregistration).
 	 */
-	list_for_each_entry_rcu_locked(link, &dev->links.consumers, s_node)
+	list_for_each_entry_srcu_locked(link, &dev->links.consumers, s_node)
 		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
 			dpm_wait(link->consumer, async);
 
@@ -675,7 +675,7 @@ static void dpm_async_resume_subordinate(struct device *dev, async_func_t func)
 	idx = device_links_read_lock();
 
 	/* Start processing the device's "async" consumers. */
-	list_for_each_entry_rcu_locked(link, &dev->links.consumers, s_node)
+	list_for_each_entry_srcu_locked(link, &dev->links.consumers, s_node)
 		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
 			dpm_async_with_cleanup(link->consumer, func);
 
@@ -1330,7 +1330,7 @@ static void dpm_async_suspend_superior(struct device *dev, async_func_t func)
 	idx = device_links_read_lock();
 
 	/* Start processing the device's "async" suppliers. */
-	list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node)
+	list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node)
 		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
 			dpm_async_with_cleanup(link->supplier, func);
 
@@ -1384,7 +1384,7 @@ static void dpm_superior_set_must_resume(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node)
+	list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node)
 		link->supplier->power.must_resume = true;
 
 	device_links_read_unlock(idx);
@@ -1813,7 +1813,7 @@ static void dpm_clear_superiors_direct_complete(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) {
+	list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node) {
 		spin_lock_irq(&link->supplier->power.lock);
 		link->supplier->power.direct_complete = false;
 		spin_unlock_irq(&link->supplier->power.lock);
@@ -2065,7 +2065,7 @@ static bool device_prepare_smart_suspend(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) {
+	list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node) {
 		if (!device_link_test(link, DL_FLAG_PM_RUNTIME))
 			continue;
 
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 3e84dc4122de..8c23a11e8017 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1903,8 +1903,8 @@ void pm_runtime_get_suppliers(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
-				device_links_read_lock_held())
+	list_for_each_entry_srcu(link, &dev->links.suppliers, c_node,
+				 device_links_read_lock_held())
 		if (device_link_test(link, DL_FLAG_PM_RUNTIME)) {
 			link->supplier_preactivated = true;
 			pm_runtime_get_sync(link->supplier);
-- 
cgit v1.2.3


From 3ce3f569991347d2085925041f4932232da43bcf Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 2 Sep 2025 15:45:14 +0200
Subject: PM: core: Add two macros for walking device links

Add separate macros for walking links to suppliers and consumers of a
device to help device links users to avoid exposing the internals of
struct dev_links_info in their code and possible coding mistakes related
to that.

Accordingly, use the new macros to replace open-coded device links list
walks in the core power management code.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patch.msgid.link/1944671.tdWV9SEqCh@rafael.j.wysocki
---
 drivers/base/base.h          |  8 ++++++++
 drivers/base/power/main.c    | 18 +++++++-----------
 drivers/base/power/runtime.c |  3 +--
 3 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/base.h b/drivers/base/base.h
index 123031a757d9..700aecd22fd3 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -251,6 +251,14 @@ void device_links_unbind_consumers(struct device *dev);
 void fw_devlink_drivers_done(void);
 void fw_devlink_probing_done(void);
 
+#define dev_for_each_link_to_supplier(__link, __dev)	\
+	list_for_each_entry_srcu(__link, &(__dev)->links.suppliers, c_node, \
+				 device_links_read_lock_held())
+
+#define dev_for_each_link_to_consumer(__link, __dev)	\
+	list_for_each_entry_srcu(__link, &(__dev)->links.consumers, s_node, \
+				 device_links_read_lock_held())
+
 /* device pm support */
 void device_pm_move_to_tail(struct device *dev);
 
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 2fa53896db82..e3a7f25bf8d3 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -40,10 +40,6 @@
 
 typedef int (*pm_callback_t)(struct device *);
 
-#define list_for_each_entry_srcu_locked(pos, head, member) \
-	list_for_each_entry_srcu(pos, head, member, \
-			device_links_read_lock_held())
-
 /*
  * The entries in the dpm_list list are in a depth first order, simply
  * because children are guaranteed to be discovered after parents, and
@@ -281,7 +277,7 @@ static void dpm_wait_for_suppliers(struct device *dev, bool async)
 	 * callbacks freeing the link objects for the links in the list we're
 	 * walking.
 	 */
-	list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node)
+	dev_for_each_link_to_supplier(link, dev)
 		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
 			dpm_wait(link->supplier, async);
 
@@ -338,7 +334,7 @@ static void dpm_wait_for_consumers(struct device *dev, bool async)
 	 * continue instead of trying to continue in parallel with its
 	 * unregistration).
 	 */
-	list_for_each_entry_srcu_locked(link, &dev->links.consumers, s_node)
+	dev_for_each_link_to_consumer(link, dev)
 		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
 			dpm_wait(link->consumer, async);
 
@@ -675,7 +671,7 @@ static void dpm_async_resume_subordinate(struct device *dev, async_func_t func)
 	idx = device_links_read_lock();
 
 	/* Start processing the device's "async" consumers. */
-	list_for_each_entry_srcu_locked(link, &dev->links.consumers, s_node)
+	dev_for_each_link_to_consumer(link, dev)
 		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
 			dpm_async_with_cleanup(link->consumer, func);
 
@@ -1330,7 +1326,7 @@ static void dpm_async_suspend_superior(struct device *dev, async_func_t func)
 	idx = device_links_read_lock();
 
 	/* Start processing the device's "async" suppliers. */
-	list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node)
+	dev_for_each_link_to_supplier(link, dev)
 		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
 			dpm_async_with_cleanup(link->supplier, func);
 
@@ -1384,7 +1380,7 @@ static void dpm_superior_set_must_resume(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node)
+	dev_for_each_link_to_supplier(link, dev)
 		link->supplier->power.must_resume = true;
 
 	device_links_read_unlock(idx);
@@ -1813,7 +1809,7 @@ static void dpm_clear_superiors_direct_complete(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node) {
+	dev_for_each_link_to_supplier(link, dev) {
 		spin_lock_irq(&link->supplier->power.lock);
 		link->supplier->power.direct_complete = false;
 		spin_unlock_irq(&link->supplier->power.lock);
@@ -2065,7 +2061,7 @@ static bool device_prepare_smart_suspend(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_srcu_locked(link, &dev->links.suppliers, c_node) {
+	dev_for_each_link_to_supplier(link, dev) {
 		if (!device_link_test(link, DL_FLAG_PM_RUNTIME))
 			continue;
 
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 8c23a11e8017..7420b9851fe0 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1903,8 +1903,7 @@ void pm_runtime_get_suppliers(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_srcu(link, &dev->links.suppliers, c_node,
-				 device_links_read_lock_held())
+	dev_for_each_link_to_supplier(link, dev)
 		if (device_link_test(link, DL_FLAG_PM_RUNTIME)) {
 			link->supplier_preactivated = true;
 			pm_runtime_get_sync(link->supplier);
-- 
cgit v1.2.3


From 2b2d4c744e1ab8b8d41c5c2ccf889f5441e50105 Mon Sep 17 00:00:00 2001
From: Xichao Zhao <zhao.xichao@vivo.com>
Date: Wed, 27 Aug 2025 19:40:21 +0800
Subject: drivers: base: fix "publically"->"publicly"

Trivial fix to spelling mistake in comment text.

Signed-off-by: Xichao Zhao <zhao.xichao@vivo.com>
Link: https://lore.kernel.org/r/20250827114021.476668-1-zhao.xichao@vivo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/base')

diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index efc575a00edd..ccaebb8ac656 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -325,7 +325,7 @@ static void cpu_device_release(struct device *dev)
 	 * This is an empty function to prevent the driver core from spitting a
 	 * warning at us.  Yes, I know this is directly opposite of what the
 	 * documentation for the driver core and kobjects say, and the author
-	 * of this code has already been publically ridiculed for doing
+	 * of this code has already been publicly ridiculed for doing
 	 * something as foolish as this.  However, at this point in time, it is
 	 * the only way to handle the issue of statically allocated cpu
 	 * devices.  The different architectures will have their cpu device
-- 
cgit v1.2.3


From a86537ad21c7ba587241b0dcdd186f894a69fac7 Mon Sep 17 00:00:00 2001
From: Danilo Krummrich <dakr@kernel.org>
Date: Fri, 29 Aug 2025 22:58:57 +0200
Subject: driver core: get_dev_from_fwnode(): document potential race

Commit 9a4681a485ee ("driver core: Export get_dev_from_fwnode()") made
get_dev_from_fwnode() publicly available, but didn't document the
guarantees a caller must uphold:

get_dev_from_fwnode() obtains a reference count from the device pointer
stored in a struct fwnode_handle. While having its own reference count,
struct fwnode_handle does not keep a reference count of the device it
has a pointer to.

Consequently, a caller must guarantee that it is impossible that the
last device reference is dropped and the device is released concurrently
while calling get_dev_from_fwnode(), otherwise this is a potential UAF
and hence a bug.

Thus, document this potential race condition for get_dev_from_fwnode().

Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Saravana Kannan <saravanak@google.com>
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
Link: https://lore.kernel.org/r/20250829205911.33142-1-dakr@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'drivers/base')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index d22d6b23e758..cd806be435cd 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -5278,6 +5278,25 @@ void device_set_node(struct device *dev, struct fwnode_handle *fwnode)
 }
 EXPORT_SYMBOL_GPL(device_set_node);
 
+/**
+ * get_dev_from_fwnode - Obtain a reference count of the struct device the
+ * struct fwnode_handle is associated with.
+ * @fwnode: The pointer to the struct fwnode_handle to obtain the struct device
+ * reference count of.
+ *
+ * This function obtains a reference count of the device the device pointer
+ * embedded in the struct fwnode_handle points to.
+ *
+ * Note that the struct device pointer embedded in struct fwnode_handle does
+ * *not* have a reference count of the struct device itself.
+ *
+ * Hence, it is a UAF (and thus a bug) to call this function if the caller can't
+ * guarantee that the last reference count of the corresponding struct device is
+ * not dropped concurrently.
+ *
+ * This is possible since struct fwnode_handle has its own reference count and
+ * hence can out-live the struct device it is associated with.
+ */
 struct device *get_dev_from_fwnode(struct fwnode_handle *fwnode)
 {
 	return get_device((fwnode)->dev);
-- 
cgit v1.2.3


From 716cec5fc92f6d4090a54ddb01042bce02b3c771 Mon Sep 17 00:00:00 2001
From: Gil Fine <gil.fine@linux.intel.com>
Date: Sun, 31 Aug 2025 22:49:30 +0300
Subject: driver core: Fix order of the kernel-doc parameters

Fix the order of the kernel-doc parameters in device_find_child() and
device_for_each_child*() functions to match the actual functions signature.

No functional changes.

Signed-off-by: Gil Fine <gil.fine@linux.intel.com>
Link: https://lore.kernel.org/r/20250831194930.2063390-1-gil.fine@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index cd806be435cd..fa8093119602 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -3994,8 +3994,8 @@ const char *device_get_devnode(const struct device *dev,
 /**
  * device_for_each_child - device child iterator.
  * @parent: parent struct device.
- * @fn: function to be called for each device.
  * @data: data for the callback.
+ * @fn: function to be called for each device.
  *
  * Iterate over @parent's child devices, and call @fn for each,
  * passing it @data.
@@ -4024,8 +4024,8 @@ EXPORT_SYMBOL_GPL(device_for_each_child);
 /**
  * device_for_each_child_reverse - device child iterator in reversed order.
  * @parent: parent struct device.
- * @fn: function to be called for each device.
  * @data: data for the callback.
+ * @fn: function to be called for each device.
  *
  * Iterate over @parent's child devices, and call @fn for each,
  * passing it @data.
@@ -4055,8 +4055,8 @@ EXPORT_SYMBOL_GPL(device_for_each_child_reverse);
  * device_for_each_child_reverse_from - device child iterator in reversed order.
  * @parent: parent struct device.
  * @from: optional starting point in child list
- * @fn: function to be called for each device.
  * @data: data for the callback.
+ * @fn: function to be called for each device.
  *
  * Iterate over @parent's child devices, starting at @from, and call @fn
  * for each, passing it @data. This helper is identical to
@@ -4089,8 +4089,8 @@ EXPORT_SYMBOL_GPL(device_for_each_child_reverse_from);
 /**
  * device_find_child - device iterator for locating a particular device.
  * @parent: parent struct device
- * @match: Callback function to check device
  * @data: Data to pass to match function
+ * @match: Callback function to check device
  *
  * This is similar to the device_for_each_child() function above, but it
  * returns a reference to a device that is 'found' for later use, as
-- 
cgit v1.2.3


From eca710386972f2a72708b0b2a615eb150d218e18 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Date: Wed, 27 Aug 2025 13:05:41 +0300
Subject: driver core: auxiliary bus: Drop dev_pm_domain_detach() call

Starting with commit f99508074e78 ("PM: domains: Detach on
device_unbind_cleanup()"), there is no longer a need to call
dev_pm_domain_detach() in the bus remove function. The
device_unbind_cleanup() function now handles this to avoid
invoking devres cleanup handlers while the PM domain is
powered off, which could otherwise lead to failures as
described in the above-mentioned commit.

Drop the explicit dev_pm_domain_detach() call and rely instead
on the flags passed to dev_pm_domain_attach() to power off the
domain.

Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20250827100541.926350-1-claudiu.beznea.uj@bp.renesas.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/auxiliary.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c
index 12ffdd843756..f65ba30d0617 100644
--- a/drivers/base/auxiliary.c
+++ b/drivers/base/auxiliary.c
@@ -217,17 +217,14 @@ static int auxiliary_bus_probe(struct device *dev)
 	struct auxiliary_device *auxdev = to_auxiliary_dev(dev);
 	int ret;
 
-	ret = dev_pm_domain_attach(dev, PD_FLAG_ATTACH_POWER_ON);
+	ret = dev_pm_domain_attach(dev, PD_FLAG_ATTACH_POWER_ON |
+					PD_FLAG_DETACH_POWER_OFF);
 	if (ret) {
 		dev_warn(dev, "Failed to attach to PM Domain : %d\n", ret);
 		return ret;
 	}
 
-	ret = auxdrv->probe(auxdev, auxiliary_match_id(auxdrv->id_table, auxdev));
-	if (ret)
-		dev_pm_domain_detach(dev, true);
-
-	return ret;
+	return auxdrv->probe(auxdev, auxiliary_match_id(auxdrv->id_table, auxdev));
 }
 
 static void auxiliary_bus_remove(struct device *dev)
@@ -237,7 +234,6 @@ static void auxiliary_bus_remove(struct device *dev)
 
 	if (auxdrv->remove)
 		auxdrv->remove(auxdev);
-	dev_pm_domain_detach(dev, true);
 }
 
 static void auxiliary_bus_shutdown(struct device *dev)
-- 
cgit v1.2.3


From 4c48aed6dfcd32ea23e52adc1072405a62facf46 Mon Sep 17 00:00:00 2001
From: Zijun Hu <zijun.hu@oss.qualcomm.com>
Date: Wed, 3 Sep 2025 19:37:22 +0800
Subject: driver core: auxiliary bus: Optimize logic of auxiliary_match_id()

auxiliary_match_id() repeatedly calculates variable @match_size in the
for loop, however, the variable is fixed actually, so it is enough to
only calculate the variable once.

Besides, the function should return directly if name of the @auxdev
does not include '.', but it still iterates over the ID table.

Additionally, statement 'dev_name(&auxdev->dev)' is fixed, but may be
evaluated more than 3 times.

Optimize logic of the function by:
- Move the logic calculating the variable out of the for loop
- Return NULL directly if @p == NULL
- Give the statement an dedicated local variable @auxdev_name

Signed-off-by: Zijun Hu <zijun.hu@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20250903-fix_auxbus-v2-1-3eae8374fd65@oss.qualcomm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/auxiliary.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c
index f65ba30d0617..04bdbff4dbe5 100644
--- a/drivers/base/auxiliary.c
+++ b/drivers/base/auxiliary.c
@@ -171,17 +171,18 @@
 static const struct auxiliary_device_id *auxiliary_match_id(const struct auxiliary_device_id *id,
 							    const struct auxiliary_device *auxdev)
 {
-	for (; id->name[0]; id++) {
-		const char *p = strrchr(dev_name(&auxdev->dev), '.');
-		int match_size;
+	const char *auxdev_name = dev_name(&auxdev->dev);
+	const char *p = strrchr(auxdev_name, '.');
+	int match_size;
 
-		if (!p)
-			continue;
-		match_size = p - dev_name(&auxdev->dev);
+	if (!p)
+		return NULL;
+	match_size = p - auxdev_name;
 
+	for (; id->name[0]; id++) {
 		/* use dev_name(&auxdev->dev) prefix before last '.' char to match to */
 		if (strlen(id->name) == match_size &&
-		    !strncmp(dev_name(&auxdev->dev), id->name, match_size))
+		    !strncmp(auxdev_name, id->name, match_size))
 			return id;
 	}
 	return NULL;
-- 
cgit v1.2.3


From d364d2ad07873dc4991b2a631a8536597272418b Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Tue, 2 Sep 2025 13:59:11 +0200
Subject: devres: provide devm_kmemdup_const()

Provide a function similar to devm_strdup_const() but for copying blocks
of memory that are likely to be placed in .rodata.

Reviewed-by: Andy Shevchenko <andy@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/base/devres.c         | 21 +++++++++++++++++++++
 include/linux/device/devres.h |  2 ++
 2 files changed, 23 insertions(+)

(limited to 'drivers/base')

diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index ff55e1bcfa30..c948c88d3956 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -1117,6 +1117,27 @@ void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp)
 }
 EXPORT_SYMBOL_GPL(devm_kmemdup);
 
+/**
+ * devm_kmemdup_const - conditionally duplicate and manage a region of memory
+ *
+ * @dev: Device this memory belongs to
+ * @src: memory region to duplicate
+ * @len: memory region length,
+ * @gfp: GFP mask to use
+ *
+ * Return: source address if it is in .rodata or the return value of kmemdup()
+ * to which the function falls back otherwise.
+ */
+const void *
+devm_kmemdup_const(struct device *dev, const void *src, size_t len, gfp_t gfp)
+{
+	if (is_kernel_rodata((unsigned long)src))
+		return src;
+
+	return devm_kmemdup(dev, src, len, gfp);
+}
+EXPORT_SYMBOL_GPL(devm_kmemdup_const);
+
 struct pages_devres {
 	unsigned long addr;
 	unsigned int order;
diff --git a/include/linux/device/devres.h b/include/linux/device/devres.h
index ae696d10faff..8c5f57e0d613 100644
--- a/include/linux/device/devres.h
+++ b/include/linux/device/devres.h
@@ -80,6 +80,8 @@ void devm_kfree(struct device *dev, const void *p);
 
 void * __realloc_size(3)
 devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp);
+const void *
+devm_kmemdup_const(struct device *dev, const void *src, size_t len, gfp_t gfp);
 static inline void *devm_kmemdup_array(struct device *dev, const void *src,
 				       size_t n, size_t size, gfp_t flags)
 {
-- 
cgit v1.2.3


From 786eb990cfb78aab94eb74fb32a030e14723a620 Mon Sep 17 00:00:00 2001
From: Donet Tom <donettom@linux.ibm.com>
Date: Fri, 22 Aug 2025 14:18:45 +0530
Subject: drivers/base/node: handle error properly in register_one_node()

If register_node() returns an error, it is not handled correctly.
The function will proceed further and try to register CPUs under the
node, which is not correct.

So, in this patch, if register_node() returns an error, we return
immediately from the function.

Link: https://lkml.kernel.org/r/20250822084845.19219-1-donettom@linux.ibm.com
Fixes: 76b67ed9dce6 ("[PATCH] node hotplug: register cpu: remove node struct")
Signed-off-by: Donet Tom <donettom@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Alison Schofield <alison.schofield@intel.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Donet Tom <donettom@linux.ibm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Cc: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/base/node.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'drivers/base')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 3399594136b2..45d512939c40 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -885,6 +885,11 @@ int register_one_node(int nid)
 	node_devices[nid] = node;
 
 	error = register_node(node_devices[nid], nid);
+	if (error) {
+		node_devices[nid] = NULL;
+		kfree(node);
+		return error;
+	}
 
 	/* link cpu under this node */
 	for_each_present_cpu(cpu) {
-- 
cgit v1.2.3


From 3d18f80ce181ba27f37d0ec1c550b22acb01dd49 Mon Sep 17 00:00:00 2001
From: NeilBrown <neil@brown.name>
Date: Mon, 22 Sep 2025 14:29:52 +1000
Subject: VFS: rename kern_path_locked() and related functions.

kern_path_locked() is now only used to prepare for removing an object
from the filesystem (and that is the only credible reason for wanting a
positive locked dentry).  Thus it corresponds to kern_path_create() and
so should have a corresponding name.

Unfortunately the name "kern_path_create" is somewhat misleading as it
doesn't actually create anything.  The recently added
simple_start_creating() provides a better pattern I believe.  The
"start" can be matched with "end" to bracket the creating or removing.

So this patch changes names:

 kern_path_locked -> start_removing_path
 kern_path_create -> start_creating_path
 user_path_create -> start_creating_user_path
 user_path_locked_at -> start_removing_user_path_at
 done_path_create -> end_creating_path

and also introduces end_removing_path() which is identical to
end_creating_path().

__start_removing_path (which was __kern_path_locked) is enhanced to
call mnt_want_write() for consistency with the start_creating_path().

Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 Documentation/filesystems/porting.rst        | 12 ++++++
 arch/powerpc/platforms/cell/spufs/syscalls.c |  4 +-
 drivers/base/devtmpfs.c                      | 22 +++++------
 fs/bcachefs/fs-ioctl.c                       | 10 ++---
 fs/init.c                                    | 17 ++++----
 fs/namei.c                                   | 59 +++++++++++++++++-----------
 fs/ocfs2/refcounttree.c                      |  4 +-
 fs/smb/server/vfs.c                          |  8 ++--
 include/linux/namei.h                        | 14 ++++---
 kernel/bpf/inode.c                           |  4 +-
 net/unix/af_unix.c                           |  6 +--
 11 files changed, 93 insertions(+), 67 deletions(-)

(limited to 'drivers/base')

diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 85f590254f07..e0494860be6b 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1285,3 +1285,15 @@ rather than a VMA, as the VMA at this stage is not yet valid.
 The vm_area_desc provides the minimum required information for a filesystem
 to initialise state upon memory mapping of a file-backed region, and output
 parameters for the file system to set this state.
+
+---
+
+**mandatory**
+
+Several functions are renamed:
+
+-  kern_path_locked -> start_removing_path
+-  kern_path_create -> start_creating_path
+-  user_path_create -> start_creating_user_path
+-  user_path_locked_at -> start_removing_user_path_at
+-  done_path_create -> end_creating_path
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index 157e046e6e93..ea4ba1b6ce6a 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -67,11 +67,11 @@ static long do_spu_create(const char __user *pathname, unsigned int flags,
 	struct dentry *dentry;
 	int ret;
 
-	dentry = user_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
+	dentry = start_creating_user_path(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
 	ret = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
 		ret = spufs_create(&path, dentry, flags, mode, neighbor);
-		done_path_create(&path, dentry);
+		end_creating_path(&path, dentry);
 	}
 
 	return ret;
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 31bfb3194b4c..9d4e46ad8352 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -176,7 +176,7 @@ static int dev_mkdir(const char *name, umode_t mode)
 	struct dentry *dentry;
 	struct path path;
 
-	dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
+	dentry = start_creating_path(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
@@ -184,7 +184,7 @@ static int dev_mkdir(const char *name, umode_t mode)
 	if (!IS_ERR(dentry))
 		/* mark as kernel-created inode */
 		d_inode(dentry)->i_private = &thread;
-	done_path_create(&path, dentry);
+	end_creating_path(&path, dentry);
 	return PTR_ERR_OR_ZERO(dentry);
 }
 
@@ -222,10 +222,10 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
 	struct path path;
 	int err;
 
-	dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);
+	dentry = start_creating_path(AT_FDCWD, nodename, &path, 0);
 	if (dentry == ERR_PTR(-ENOENT)) {
 		create_path(nodename);
-		dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);
+		dentry = start_creating_path(AT_FDCWD, nodename, &path, 0);
 	}
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
@@ -246,7 +246,7 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
 		/* mark as kernel-created inode */
 		d_inode(dentry)->i_private = &thread;
 	}
-	done_path_create(&path, dentry);
+	end_creating_path(&path, dentry);
 	return err;
 }
 
@@ -256,7 +256,7 @@ static int dev_rmdir(const char *name)
 	struct dentry *dentry;
 	int err;
 
-	dentry = kern_path_locked(name, &parent);
+	dentry = start_removing_path(name, &parent);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 	if (d_inode(dentry)->i_private == &thread)
@@ -265,9 +265,7 @@ static int dev_rmdir(const char *name)
 	else
 		err = -EPERM;
 
-	dput(dentry);
-	inode_unlock(d_inode(parent.dentry));
-	path_put(&parent);
+	end_removing_path(&parent, dentry);
 	return err;
 }
 
@@ -325,7 +323,7 @@ static int handle_remove(const char *nodename, struct device *dev)
 	int deleted = 0;
 	int err = 0;
 
-	dentry = kern_path_locked(nodename, &parent);
+	dentry = start_removing_path(nodename, &parent);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
@@ -349,10 +347,8 @@ static int handle_remove(const char *nodename, struct device *dev)
 		if (!err || err == -ENOENT)
 			deleted = 1;
 	}
-	dput(dentry);
-	inode_unlock(d_inode(parent.dentry));
+	end_removing_path(&parent, dentry);
 
-	path_put(&parent);
 	if (deleted && strchr(nodename, '/'))
 		delete_path(nodename);
 	return err;
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 4e72e654da96..43510da5e734 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -255,7 +255,7 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
 		snapshot_src = inode_inum(to_bch_ei(src_path.dentry->d_inode));
 	}
 
-	dst_dentry = user_path_create(arg.dirfd,
+	dst_dentry = start_creating_user_path(arg.dirfd,
 			(const char __user *)(unsigned long)arg.dst_ptr,
 			&dst_path, lookup_flags);
 	error = PTR_ERR_OR_ZERO(dst_dentry);
@@ -314,7 +314,7 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
 	d_instantiate(dst_dentry, &inode->v);
 	fsnotify_mkdir(dir, dst_dentry);
 err3:
-	done_path_create(&dst_path, dst_dentry);
+	end_creating_path(&dst_path, dst_dentry);
 err2:
 	if (arg.src_ptr)
 		path_put(&src_path);
@@ -334,7 +334,7 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
 	if (arg.flags)
 		return -EINVAL;
 
-	victim = user_path_locked_at(arg.dirfd, name, &path);
+	victim = start_removing_user_path_at(arg.dirfd, name, &path);
 	if (IS_ERR(victim))
 		return PTR_ERR(victim);
 
@@ -351,9 +351,7 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
 		d_invalidate(victim);
 	}
 err:
-	inode_unlock(dir);
-	dput(victim);
-	path_put(&path);
+	end_removing_path(&path, victim);
 	return ret;
 }
 
diff --git a/fs/init.c b/fs/init.c
index eef5124885e3..07f592ccdba8 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -149,7 +149,7 @@ int __init init_mknod(const char *filename, umode_t mode, unsigned int dev)
 	else if (!(S_ISBLK(mode) || S_ISCHR(mode)))
 		return -EINVAL;
 
-	dentry = kern_path_create(AT_FDCWD, filename, &path, 0);
+	dentry = start_creating_path(AT_FDCWD, filename, &path, 0);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
@@ -158,7 +158,7 @@ int __init init_mknod(const char *filename, umode_t mode, unsigned int dev)
 	if (!error)
 		error = vfs_mknod(mnt_idmap(path.mnt), path.dentry->d_inode,
 				  dentry, mode, new_decode_dev(dev));
-	done_path_create(&path, dentry);
+	end_creating_path(&path, dentry);
 	return error;
 }
 
@@ -173,7 +173,7 @@ int __init init_link(const char *oldname, const char *newname)
 	if (error)
 		return error;
 
-	new_dentry = kern_path_create(AT_FDCWD, newname, &new_path, 0);
+	new_dentry = start_creating_path(AT_FDCWD, newname, &new_path, 0);
 	error = PTR_ERR(new_dentry);
 	if (IS_ERR(new_dentry))
 		goto out;
@@ -191,7 +191,7 @@ int __init init_link(const char *oldname, const char *newname)
 	error = vfs_link(old_path.dentry, idmap, new_path.dentry->d_inode,
 			 new_dentry, NULL);
 out_dput:
-	done_path_create(&new_path, new_dentry);
+	end_creating_path(&new_path, new_dentry);
 out:
 	path_put(&old_path);
 	return error;
@@ -203,14 +203,14 @@ int __init init_symlink(const char *oldname, const char *newname)
 	struct path path;
 	int error;
 
-	dentry = kern_path_create(AT_FDCWD, newname, &path, 0);
+	dentry = start_creating_path(AT_FDCWD, newname, &path, 0);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 	error = security_path_symlink(&path, dentry, oldname);
 	if (!error)
 		error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode,
 				    dentry, oldname);
-	done_path_create(&path, dentry);
+	end_creating_path(&path, dentry);
 	return error;
 }
 
@@ -225,7 +225,8 @@ int __init init_mkdir(const char *pathname, umode_t mode)
 	struct path path;
 	int error;
 
-	dentry = kern_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
+	dentry = start_creating_path(AT_FDCWD, pathname, &path,
+				     LOOKUP_DIRECTORY);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 	mode = mode_strip_umask(d_inode(path.dentry), mode);
@@ -236,7 +237,7 @@ int __init init_mkdir(const char *pathname, umode_t mode)
 		if (IS_ERR(dentry))
 			error = PTR_ERR(dentry);
 	}
-	done_path_create(&path, dentry);
+	end_creating_path(&path, dentry);
 	return error;
 }
 
diff --git a/fs/namei.c b/fs/namei.c
index 4017bc8641d3..92973a7a8091 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2758,7 +2758,8 @@ static int filename_parentat(int dfd, struct filename *name,
 }
 
 /* does lookup, returns the object with parent locked */
-static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct path *path)
+static struct dentry *__start_removing_path(int dfd, struct filename *name,
+					   struct path *path)
 {
 	struct path parent_path __free(path_put) = {};
 	struct dentry *d;
@@ -2770,15 +2771,26 @@ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct
 		return ERR_PTR(error);
 	if (unlikely(type != LAST_NORM))
 		return ERR_PTR(-EINVAL);
+	/* don't fail immediately if it's r/o, at least try to report other errors */
+	error = mnt_want_write(parent_path.mnt);
 	inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT);
 	d = lookup_one_qstr_excl(&last, parent_path.dentry, 0);
-	if (IS_ERR(d)) {
-		inode_unlock(parent_path.dentry->d_inode);
-		return d;
-	}
+	if (IS_ERR(d))
+		goto unlock;
+	if (error)
+		goto fail;
 	path->dentry = no_free_ptr(parent_path.dentry);
 	path->mnt = no_free_ptr(parent_path.mnt);
 	return d;
+
+fail:
+	dput(d);
+	d = ERR_PTR(error);
+unlock:
+	inode_unlock(parent_path.dentry->d_inode);
+	if (!error)
+		mnt_drop_write(parent_path.mnt);
+	return d;
 }
 
 /**
@@ -2816,24 +2828,26 @@ struct dentry *kern_path_parent(const char *name, struct path *path)
 	return d;
 }
 
-struct dentry *kern_path_locked(const char *name, struct path *path)
+struct dentry *start_removing_path(const char *name, struct path *path)
 {
 	struct filename *filename = getname_kernel(name);
-	struct dentry *res = __kern_path_locked(AT_FDCWD, filename, path);
+	struct dentry *res = __start_removing_path(AT_FDCWD, filename, path);
 
 	putname(filename);
 	return res;
 }
 
-struct dentry *user_path_locked_at(int dfd, const char __user *name, struct path *path)
+struct dentry *start_removing_user_path_at(int dfd,
+					   const char __user *name,
+					   struct path *path)
 {
 	struct filename *filename = getname(name);
-	struct dentry *res = __kern_path_locked(dfd, filename, path);
+	struct dentry *res = __start_removing_path(dfd, filename, path);
 
 	putname(filename);
 	return res;
 }
-EXPORT_SYMBOL(user_path_locked_at);
+EXPORT_SYMBOL(start_removing_user_path_at);
 
 int kern_path(const char *name, unsigned int flags, struct path *path)
 {
@@ -4223,8 +4237,8 @@ out:
 	return dentry;
 }
 
-struct dentry *kern_path_create(int dfd, const char *pathname,
-				struct path *path, unsigned int lookup_flags)
+struct dentry *start_creating_path(int dfd, const char *pathname,
+				   struct path *path, unsigned int lookup_flags)
 {
 	struct filename *filename = getname_kernel(pathname);
 	struct dentry *res = filename_create(dfd, filename, path, lookup_flags);
@@ -4232,9 +4246,9 @@ struct dentry *kern_path_create(int dfd, const char *pathname,
 	putname(filename);
 	return res;
 }
-EXPORT_SYMBOL(kern_path_create);
+EXPORT_SYMBOL(start_creating_path);
 
-void done_path_create(struct path *path, struct dentry *dentry)
+void end_creating_path(struct path *path, struct dentry *dentry)
 {
 	if (!IS_ERR(dentry))
 		dput(dentry);
@@ -4242,10 +4256,11 @@ void done_path_create(struct path *path, struct dentry *dentry)
 	mnt_drop_write(path->mnt);
 	path_put(path);
 }
-EXPORT_SYMBOL(done_path_create);
+EXPORT_SYMBOL(end_creating_path);
 
-inline struct dentry *user_path_create(int dfd, const char __user *pathname,
-				struct path *path, unsigned int lookup_flags)
+inline struct dentry *start_creating_user_path(
+	int dfd, const char __user *pathname,
+	struct path *path, unsigned int lookup_flags)
 {
 	struct filename *filename = getname(pathname);
 	struct dentry *res = filename_create(dfd, filename, path, lookup_flags);
@@ -4253,7 +4268,7 @@ inline struct dentry *user_path_create(int dfd, const char __user *pathname,
 	putname(filename);
 	return res;
 }
-EXPORT_SYMBOL(user_path_create);
+EXPORT_SYMBOL(start_creating_user_path);
 
 /**
  * vfs_mknod - create device node or file
@@ -4361,7 +4376,7 @@ retry:
 			break;
 	}
 out2:
-	done_path_create(&path, dentry);
+	end_creating_path(&path, dentry);
 	if (retry_estale(error, lookup_flags)) {
 		lookup_flags |= LOOKUP_REVAL;
 		goto retry;
@@ -4465,7 +4480,7 @@ retry:
 		if (IS_ERR(dentry))
 			error = PTR_ERR(dentry);
 	}
-	done_path_create(&path, dentry);
+	end_creating_path(&path, dentry);
 	if (retry_estale(error, lookup_flags)) {
 		lookup_flags |= LOOKUP_REVAL;
 		goto retry;
@@ -4819,7 +4834,7 @@ retry:
 	if (!error)
 		error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode,
 				    dentry, from->name);
-	done_path_create(&path, dentry);
+	end_creating_path(&path, dentry);
 	if (retry_estale(error, lookup_flags)) {
 		lookup_flags |= LOOKUP_REVAL;
 		goto retry;
@@ -4988,7 +5003,7 @@ retry:
 	error = vfs_link(old_path.dentry, idmap, new_path.dentry->d_inode,
 			 new_dentry, &delegated_inode);
 out_dput:
-	done_path_create(&new_path, new_dentry);
+	end_creating_path(&new_path, new_dentry);
 	if (delegated_inode) {
 		error = break_deleg_wait(&delegated_inode);
 		if (!error) {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 8f732742b26e..267b50e8e42e 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4418,7 +4418,7 @@ int ocfs2_reflink_ioctl(struct inode *inode,
 		return error;
 	}
 
-	new_dentry = user_path_create(AT_FDCWD, newname, &new_path, 0);
+	new_dentry = start_creating_user_path(AT_FDCWD, newname, &new_path, 0);
 	error = PTR_ERR(new_dentry);
 	if (IS_ERR(new_dentry)) {
 		mlog_errno(error);
@@ -4435,7 +4435,7 @@ int ocfs2_reflink_ioctl(struct inode *inode,
 				  d_inode(new_path.dentry),
 				  new_dentry, preserve);
 out_dput:
-	done_path_create(&new_path, new_dentry);
+	end_creating_path(&new_path, new_dentry);
 out:
 	path_put(&old_path);
 
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index 07739055ac9f..1cfa688904b2 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -196,7 +196,7 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode)
 		pr_err("File(%s): creation failed (err:%d)\n", name, err);
 	}
 
-	done_path_create(&path, dentry);
+	end_creating_path(&path, dentry);
 	return err;
 }
 
@@ -237,7 +237,7 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
 	if (!err && dentry != d)
 		ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(dentry));
 
-	done_path_create(&path, dentry);
+	end_creating_path(&path, dentry);
 	if (err)
 		pr_err("mkdir(%s): creation failed (err:%d)\n", name, err);
 	return err;
@@ -669,7 +669,7 @@ int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname,
 		ksmbd_debug(VFS, "vfs_link failed err %d\n", err);
 
 out3:
-	done_path_create(&newpath, dentry);
+	end_creating_path(&newpath, dentry);
 out2:
 	path_put(&oldpath);
 out1:
@@ -1325,7 +1325,7 @@ struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
 	if (!abs_name)
 		return ERR_PTR(-ENOMEM);
 
-	dent = kern_path_create(AT_FDCWD, abs_name, path, flags);
+	dent = start_creating_path(AT_FDCWD, abs_name, path, flags);
 	kfree(abs_name);
 	return dent;
 }
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 1d5038c21c20..a7800ef04e76 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -59,11 +59,15 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
 extern int kern_path(const char *, unsigned, struct path *);
 struct dentry *kern_path_parent(const char *name, struct path *parent);
 
-extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int);
-extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
-extern void done_path_create(struct path *, struct dentry *);
-extern struct dentry *kern_path_locked(const char *, struct path *);
-extern struct dentry *user_path_locked_at(int , const char __user *, struct path *);
+extern struct dentry *start_creating_path(int, const char *, struct path *, unsigned int);
+extern struct dentry *start_creating_user_path(int, const char __user *, struct path *, unsigned int);
+extern void end_creating_path(struct path *, struct dentry *);
+extern struct dentry *start_removing_path(const char *, struct path *);
+extern struct dentry *start_removing_user_path_at(int , const char __user *, struct path *);
+static inline void end_removing_path(struct path *path , struct dentry *dentry)
+{
+	end_creating_path(path, dentry);
+}
 int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
 			   struct path *parent, struct qstr *last, int *type,
 			   const struct path *root);
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 5c2e96b19392..fadf3817a9c5 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -442,7 +442,7 @@ static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw,
 	umode_t mode;
 	int ret;
 
-	dentry = user_path_create(path_fd, pathname, &path, 0);
+	dentry = start_creating_user_path(path_fd, pathname, &path, 0);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
@@ -471,7 +471,7 @@ static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw,
 		ret = -EPERM;
 	}
 out:
-	done_path_create(&path, dentry);
+	end_creating_path(&path, dentry);
 	return ret;
 }
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 6d7c110814ff..768098dec231 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1387,7 +1387,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
 	 * Get the parent directory, calculate the hash for last
 	 * component.
 	 */
-	dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
+	dentry = start_creating_path(AT_FDCWD, addr->name->sun_path, &parent, 0);
 	if (IS_ERR(dentry)) {
 		err = PTR_ERR(dentry);
 		goto out;
@@ -1417,7 +1417,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
 	unix_table_double_unlock(net, old_hash, new_hash);
 	unix_insert_bsd_socket(sk);
 	mutex_unlock(&u->bindlock);
-	done_path_create(&parent, dentry);
+	end_creating_path(&parent, dentry);
 	return 0;
 
 out_unlock:
@@ -1427,7 +1427,7 @@ out_unlink:
 	/* failed after successful mknod?  unlink what we'd created... */
 	vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
 out_path:
-	done_path_create(&parent, dentry);
+	end_creating_path(&parent, dentry);
 out:
 	unix_release_addr(addr);
 	return err == -EEXIST ? -EADDRINUSE : err;
-- 
cgit v1.2.3


From 159c86f306ea20c019b36fc998e2677008183c04 Mon Sep 17 00:00:00 2001
From: Sunil V L <sunilvl@ventanamicro.com>
Date: Mon, 18 Aug 2025 09:39:11 +0530
Subject: ACPI: Add support for nargs_prop in acpi_fwnode_get_reference_args()

Currently, ACPI does not support the use of a nargs_prop (e.g.,
associated with a reference in fwnode_property_get_reference_args().
Instead, ACPI expects the number of arguments (nargs) to be explicitly
passed or known.

This behavior diverges from Open Firmware (OF), which allows the use of
a #*-cells property in the referenced node to determine the number of
arguments. Since fwnode_property_get_reference_args() is a common
interface used across both OF and ACPI firmware paradigms, it is
desirable to have a unified calling convention that works seamlessly for
both.

Add the support for ACPI to parse a nargs_prop from the referenced
fwnode, aligning its behavior with the OF backend. This allows drivers
and subsystems using fwnode_property_get_reference_args() to work in a
firmware-agnostic way without having to hardcode or special-case
argument counts for ACPI.

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
Acked-by: Jassi Brar <jassisinghbrar@gmail.com>
Link: https://lore.kernel.org/r/20250818040920.272664-16-apatel@ventanamicro.com
Signed-off-by: Paul Walmsley <pjw@kernel.org>
---
 drivers/acpi/property.c | 29 +++++++++++++++++++++++++----
 drivers/base/property.c |  2 +-
 2 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index d4863746fb11..e92402deee77 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -804,13 +804,35 @@ acpi_fwnode_get_named_child_node(const struct fwnode_handle *fwnode,
 	return NULL;
 }
 
+static unsigned int acpi_fwnode_get_args_count(struct fwnode_handle *fwnode,
+					       const char *nargs_prop)
+{
+	const struct acpi_device_data *data;
+	const union acpi_object *obj;
+	int ret;
+
+	data = acpi_device_data_of_node(fwnode);
+	if (!data)
+		return 0;
+
+	ret = acpi_data_get_property(data, nargs_prop, ACPI_TYPE_INTEGER, &obj);
+	if (ret)
+		return 0;
+
+	return obj->integer.value;
+}
+
 static int acpi_get_ref_args(struct fwnode_reference_args *args,
 			     struct fwnode_handle *ref_fwnode,
+			     const char *nargs_prop,
 			     const union acpi_object **element,
 			     const union acpi_object *end, size_t num_args)
 {
 	u32 nargs = 0, i;
 
+	if (nargs_prop)
+		num_args = acpi_fwnode_get_args_count(ref_fwnode, nargs_prop);
+
 	/*
 	 * Assume the following integer elements are all args. Stop counting on
 	 * the first reference (possibly represented as a string) or end of the
@@ -961,10 +983,10 @@ static int acpi_fwnode_get_reference_args(const struct fwnode_handle *fwnode,
 				return -EINVAL;
 
 			element++;
-
 			ret = acpi_get_ref_args(idx == index ? args : NULL,
 						acpi_fwnode_handle(device),
-						&element, end, args_count);
+						nargs_prop, &element, end,
+						args_count);
 			if (ret < 0)
 				return ret;
 
@@ -979,9 +1001,8 @@ static int acpi_fwnode_get_reference_args(const struct fwnode_handle *fwnode,
 				return -EINVAL;
 
 			element++;
-
 			ret = acpi_get_ref_args(idx == index ? args : NULL,
-						ref_fwnode, &element, end,
+						ref_fwnode, nargs_prop, &element, end,
 						args_count);
 			if (ret < 0)
 				return ret;
diff --git a/drivers/base/property.c b/drivers/base/property.c
index f626d5bbe806..6a63860579dd 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -578,7 +578,7 @@ EXPORT_SYMBOL_GPL(fwnode_property_match_property_string);
  * @prop:	The name of the property
  * @nargs_prop:	The name of the property telling the number of
  *		arguments in the referred node. NULL if @nargs is known,
- *		otherwise @nargs is ignored. Only relevant on OF.
+ *		otherwise @nargs is ignored.
  * @nargs:	Number of arguments. Ignored if @nargs_prop is non-NULL.
  * @index:	Index of the reference, from zero onwards.
  * @args:	Result structure with reference and integer arguments.
-- 
cgit v1.2.3


From 7f7acd193ba8aaa8ed07cfadc335bb17a991fd42 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Thu, 25 Sep 2025 12:42:14 -0700
Subject: PM: runtime: Add basic kunit tests for API contracts

In exploring the various return codes and failure modes of runtime PM
APIs, I found it helpful to verify and codify many of them in unit
tests, especially given that even the kerneldoc can be rather complex to
reason through, and it also has had subtle errors of its own.

Notably, I avoid testing the return codes for pm_runtime_put() and
pm_runtime_put_autosuspend(), since code that checks them is probably
wrong, and we're considering making them return 'void' altogether. I
still test the sync() variants, since those have a bit more meaning to
them.

Signed-off-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/Kconfig              |   6 +
 drivers/base/power/Makefile       |   1 +
 drivers/base/power/runtime-test.c | 253 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 260 insertions(+)
 create mode 100644 drivers/base/power/runtime-test.c

(limited to 'drivers/base')

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 064eb52ff7e2..1786d87b29e2 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -167,6 +167,12 @@ config PM_QOS_KUNIT_TEST
 	depends on KUNIT=y
 	default KUNIT_ALL_TESTS
 
+config PM_RUNTIME_KUNIT_TEST
+	tristate "KUnit Tests for runtime PM" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	depends on PM
+	default KUNIT_ALL_TESTS
+
 config HMEM_REPORTING
 	bool
 	default n
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 01f11629d241..2989e42d0161 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -4,5 +4,6 @@ obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o wakeup_stats.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
 obj-$(CONFIG_HAVE_CLK)	+= clock_ops.o
 obj-$(CONFIG_PM_QOS_KUNIT_TEST) += qos-test.o
+obj-$(CONFIG_PM_RUNTIME_KUNIT_TEST) += runtime-test.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
diff --git a/drivers/base/power/runtime-test.c b/drivers/base/power/runtime-test.c
new file mode 100644
index 000000000000..2e966fd96664
--- /dev/null
+++ b/drivers/base/power/runtime-test.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2025 Google, Inc.
+ */
+
+#include <linux/cleanup.h>
+#include <linux/pm_runtime.h>
+#include <kunit/device.h>
+#include <kunit/test.h>
+
+#define DEVICE_NAME "pm_runtime_test_device"
+
+static void pm_runtime_depth_test(struct kunit *test)
+{
+	struct device *dev = kunit_device_register(test, DEVICE_NAME);
+
+	KUNIT_ASSERT_PTR_NE(test, NULL, dev);
+
+	pm_runtime_enable(dev);
+
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_get_sync(dev));
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+	KUNIT_EXPECT_EQ(test, 1, pm_runtime_get_sync(dev)); /* "already active" */
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_put_sync(dev));
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_put_sync(dev));
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+}
+
+/* Test pm_runtime_put() and friends when already suspended. */
+static void pm_runtime_already_suspended_test(struct kunit *test)
+{
+	struct device *dev = kunit_device_register(test, DEVICE_NAME);
+
+	KUNIT_ASSERT_PTR_NE(test, NULL, dev);
+
+	pm_runtime_enable(dev);
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+
+	pm_runtime_get_noresume(dev);
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_barrier(dev)); /* no wakeup needed */
+	pm_runtime_put(dev);
+
+	pm_runtime_get_noresume(dev);
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_put_sync(dev));
+
+	KUNIT_EXPECT_EQ(test, 1, pm_runtime_suspend(dev));
+	KUNIT_EXPECT_EQ(test, 1, pm_runtime_autosuspend(dev));
+	KUNIT_EXPECT_EQ(test, 1, pm_request_autosuspend(dev));
+
+	pm_runtime_get_noresume(dev);
+	KUNIT_EXPECT_EQ(test, 1, pm_runtime_put_sync_autosuspend(dev));
+
+	pm_runtime_get_noresume(dev);
+	pm_runtime_put_autosuspend(dev);
+
+	/* Grab 2 refcounts */
+	pm_runtime_get_noresume(dev);
+	pm_runtime_get_noresume(dev);
+	/* The first put() sees usage_count 1 */
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_put_sync_autosuspend(dev));
+	/* The second put() sees usage_count 0 but tells us "already suspended". */
+	KUNIT_EXPECT_EQ(test, 1, pm_runtime_put_sync_autosuspend(dev));
+
+	/* Should have remained suspended the whole time. */
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+}
+
+static void pm_runtime_idle_test(struct kunit *test)
+{
+	struct device *dev = kunit_device_register(test, DEVICE_NAME);
+
+	KUNIT_ASSERT_PTR_NE(test, NULL, dev);
+
+	pm_runtime_enable(dev);
+
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_get_sync(dev));
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_idle(dev));
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+	pm_runtime_put_noidle(dev);
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_idle(dev));
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_idle(dev));
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_request_idle(dev));
+}
+
+static void pm_runtime_disabled_test(struct kunit *test)
+{
+	struct device *dev = kunit_device_register(test, DEVICE_NAME);
+
+	KUNIT_ASSERT_PTR_NE(test, NULL, dev);
+
+	/* Never called pm_runtime_enable() */
+	KUNIT_EXPECT_FALSE(test, pm_runtime_enabled(dev));
+
+	/* "disabled" is treated as "active" */
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+	KUNIT_EXPECT_FALSE(test, pm_runtime_suspended(dev));
+
+	/*
+	 * Note: these "fail", but they still acquire/release refcounts, so
+	 * keep them balanced.
+	 */
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_get(dev));
+	pm_runtime_put(dev);
+
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_get_sync(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_put_sync(dev));
+
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_get(dev));
+	pm_runtime_put_autosuspend(dev);
+
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_resume_and_get(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_idle(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_request_idle(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_request_resume(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_request_autosuspend(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_suspend(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_resume(dev));
+	KUNIT_EXPECT_EQ(test, -EACCES, pm_runtime_autosuspend(dev));
+
+	/* Still disabled */
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+	KUNIT_EXPECT_FALSE(test, pm_runtime_enabled(dev));
+}
+
+static void pm_runtime_error_test(struct kunit *test)
+{
+	struct device *dev = kunit_device_register(test, DEVICE_NAME);
+
+	KUNIT_ASSERT_PTR_NE(test, NULL, dev);
+
+	pm_runtime_enable(dev);
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+
+	/* Fake a .runtime_resume() error */
+	dev->power.runtime_error = -EIO;
+
+	/*
+	 * Note: these "fail", but they still acquire/release refcounts, so
+	 * keep them balanced.
+	 */
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_get(dev));
+	pm_runtime_put(dev);
+
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_get_sync(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_put_sync(dev));
+
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_get(dev));
+	pm_runtime_put_autosuspend(dev);
+
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_get(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_put_sync_autosuspend(dev));
+
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_resume_and_get(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_idle(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_request_idle(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_request_resume(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_request_autosuspend(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_suspend(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_resume(dev));
+	KUNIT_EXPECT_EQ(test, -EINVAL, pm_runtime_autosuspend(dev));
+
+	/* Error is still pending */
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+	KUNIT_EXPECT_EQ(test, -EIO, dev->power.runtime_error);
+	/* Clear error */
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_set_suspended(dev));
+	KUNIT_EXPECT_EQ(test, 0, dev->power.runtime_error);
+	/* Still suspended */
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_get(dev));
+	KUNIT_EXPECT_EQ(test, 1, pm_runtime_barrier(dev)); /* resume was pending */
+	pm_runtime_put(dev);
+	pm_runtime_suspend(dev); /* flush the put(), to suspend */
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_get_sync(dev));
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_put_sync(dev));
+
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_get_sync(dev));
+	pm_runtime_put_autosuspend(dev);
+
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_resume_and_get(dev));
+
+	/*
+	 * The following should all return -EAGAIN (usage is non-zero) or 1
+	 * (already resumed).
+	 */
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_idle(dev));
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_request_idle(dev));
+	KUNIT_EXPECT_EQ(test, 1, pm_request_resume(dev));
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_request_autosuspend(dev));
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_suspend(dev));
+	KUNIT_EXPECT_EQ(test, 1, pm_runtime_resume(dev));
+	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_autosuspend(dev));
+
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_put_sync(dev));
+
+	/* Suspended again */
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+}
+
+/*
+ * Explore a typical probe() sequence in which a device marks itself powered,
+ * but doesn't hold any runtime PM reference, so it suspends as soon as it goes
+ * idle.
+ */
+static void pm_runtime_probe_active_test(struct kunit *test)
+{
+	struct device *dev = kunit_device_register(test, DEVICE_NAME);
+
+	KUNIT_ASSERT_PTR_NE(test, NULL, dev);
+
+	KUNIT_EXPECT_TRUE(test, pm_runtime_status_suspended(dev));
+
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_set_active(dev));
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+
+	pm_runtime_enable(dev);
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+
+	/* Nothing to flush. We stay active. */
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_barrier(dev));
+	KUNIT_EXPECT_TRUE(test, pm_runtime_active(dev));
+
+	/* Ask for idle? Now we suspend. */
+	KUNIT_EXPECT_EQ(test, 0, pm_runtime_idle(dev));
+	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
+}
+
+static struct kunit_case pm_runtime_test_cases[] = {
+	KUNIT_CASE(pm_runtime_depth_test),
+	KUNIT_CASE(pm_runtime_already_suspended_test),
+	KUNIT_CASE(pm_runtime_idle_test),
+	KUNIT_CASE(pm_runtime_disabled_test),
+	KUNIT_CASE(pm_runtime_error_test),
+	KUNIT_CASE(pm_runtime_probe_active_test),
+	{}
+};
+
+static struct kunit_suite pm_runtime_test_suite = {
+	.name = "pm_runtime_test_cases",
+	.test_cases = pm_runtime_test_cases,
+};
+
+kunit_test_suite(pm_runtime_test_suite);
+MODULE_DESCRIPTION("Runtime power management unit test suite");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From d0b8651a026125d58b50b464aeb78f2c5956179f Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Thu, 25 Sep 2025 12:42:15 -0700
Subject: PM: runtime: Make put{,_sync}() return 1 when already suspended

The pm_runtime.h docs say pm_runtime_put() and pm_runtime_put_sync()
return 1 when already suspended, but this is not true -- they return
-EAGAIN. On the other hand, pm_runtime_put_sync_suspend() and
pm_runtime_put_sync_autosuspend() *do* return 1.

This is an artifact of the fact that the former are built on rpm_idle(),
whereas the latter are built on rpm_suspend().

There are precious few pm_runtime_put()/pm_runtime_put_sync() callers
that check the return code at all, but most of them only log errors, and
usually only for negative error codes. None of them should be treating
this as an error, so:

 * at best, this may fix some case where a driver treats this condition
   as an error, when it shouldn't;

 * at worst, this should make no effect; and

 * somewhere in between, we could potentially clear up non-fatal log
   messages.

Fix the pm_runtime_already_suspended_test() while tweaking the behavior.
The test makes a lot more sense when these all return 1 when the device
is already suspended:

    pm_runtime_put_sync(dev);
    pm_runtime_suspend(dev);
    pm_runtime_autosuspend(dev);
    pm_request_autosuspend(dev);
    pm_runtime_put_sync_autosuspend(dev);

Notably, I've avoided testing the return codes for these, since they
really should be ignored by callers, and we may make them 'void'
altogether:

    pm_runtime_put(dev);
    pm_runtime_put_autosuspend(dev);

Signed-off-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Dhruva Gole <d-gole@ti.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime-test.c | 2 +-
 drivers/base/power/runtime.c      | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'drivers/base')

diff --git a/drivers/base/power/runtime-test.c b/drivers/base/power/runtime-test.c
index 2e966fd96664..eca9885e807d 100644
--- a/drivers/base/power/runtime-test.c
+++ b/drivers/base/power/runtime-test.c
@@ -42,7 +42,7 @@ static void pm_runtime_already_suspended_test(struct kunit *test)
 	pm_runtime_put(dev);
 
 	pm_runtime_get_noresume(dev);
-	KUNIT_EXPECT_EQ(test, -EAGAIN, pm_runtime_put_sync(dev));
+	KUNIT_EXPECT_EQ(test, 1, pm_runtime_put_sync(dev));
 
 	KUNIT_EXPECT_EQ(test, 1, pm_runtime_suspend(dev));
 	KUNIT_EXPECT_EQ(test, 1, pm_runtime_autosuspend(dev));
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 3e84dc4122de..faa68bf9ef3d 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -498,6 +498,9 @@ static int rpm_idle(struct device *dev, int rpmflags)
 	if (retval < 0)
 		;	/* Conditions are wrong. */
 
+	else if ((rpmflags & RPM_GET_PUT) && retval == 1)
+		;	/* put() is allowed in RPM_SUSPENDED */
+
 	/* Idle notifications are allowed only in the RPM_ACTIVE state. */
 	else if (dev->power.runtime_status != RPM_ACTIVE)
 		retval = -EAGAIN;
-- 
cgit v1.2.3


From 632d31067be2f414c57955efcf29c79290cc749b Mon Sep 17 00:00:00 2001
From: Pin-yen Lin <treapking@chromium.org>
Date: Fri, 26 Sep 2025 18:23:18 +0800
Subject: PM: sleep: Do not wait on SYNC_STATE_ONLY device links

Device links with DL_FLAG_SYNC_STATE_ONLY should not affect system
suspend and resume, and functions like device_reorder_to_tail() and
device_link_add() don't try to reorder the consumers with that flag.

However, dpm_wait_for_consumers() and dpm_wait_for_suppliers() don't
check thas flag before triggering dpm_wait(), leading to potential hang
during suspend/resume.

This can be reproduced on MT8186 Corsola Chromebook with devicetree like:

usb-a-connector {
        compatible = "usb-a-connector";
        port {
                usb_a_con: endpoint {
                        remote-endpoint = <&usb_hs>;
                };
        };
};

usb_host {
        compatible = "mediatek,mt8186-xhci", "mediatek,mtk-xhci";
        port {
                usb_hs: endpoint {
                        remote-endpoint = <&usb_a_con>;
                };
        };
};

In this case, the two nodes form a cycle and a SYNC_STATE_ONLY devlink
between usb_host (supplier) and usb-a-connector (consumer) is created.

Address this by exporting device_link_flag_is_sync_state_only() and
making dpm_wait_for_consumers() and dpm_wait_for_suppliers() use it
when deciding if dpm_wait() should be called.

Fixes: 05ef983e0d65a ("driver core: Add device link support for SYNC_STATE_ONLY flag")
Signed-off-by: Pin-yen Lin <treapking@chromium.org>
Link: https://patch.msgid.link/20250926102320.4053167-1-treapking@chromium.org
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/base.h       | 1 +
 drivers/base/core.c       | 2 +-
 drivers/base/power/main.c | 6 ++++--
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/base.h b/drivers/base/base.h
index 700aecd22fd3..86fa7fbb3548 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -248,6 +248,7 @@ void device_links_driver_cleanup(struct device *dev);
 void device_links_no_driver(struct device *dev);
 bool device_links_busy(struct device *dev);
 void device_links_unbind_consumers(struct device *dev);
+bool device_link_flag_is_sync_state_only(u32 flags);
 void fw_devlink_drivers_done(void);
 void fw_devlink_probing_done(void);
 
diff --git a/drivers/base/core.c b/drivers/base/core.c
index d22d6b23e758..a54ec6df1058 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -287,7 +287,7 @@ static bool device_is_ancestor(struct device *dev, struct device *target)
 #define DL_MARKER_FLAGS		(DL_FLAG_INFERRED | \
 				 DL_FLAG_CYCLE | \
 				 DL_FLAG_MANAGED)
-static inline bool device_link_flag_is_sync_state_only(u32 flags)
+bool device_link_flag_is_sync_state_only(u32 flags)
 {
 	return (flags & ~DL_MARKER_FLAGS) == DL_FLAG_SYNC_STATE_ONLY;
 }
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index e3a7f25bf8d3..c97f8b139dc2 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -278,7 +278,8 @@ static void dpm_wait_for_suppliers(struct device *dev, bool async)
 	 * walking.
 	 */
 	dev_for_each_link_to_supplier(link, dev)
-		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
+		if (READ_ONCE(link->status) != DL_STATE_DORMANT &&
+		    !device_link_flag_is_sync_state_only(link->flags))
 			dpm_wait(link->supplier, async);
 
 	device_links_read_unlock(idx);
@@ -335,7 +336,8 @@ static void dpm_wait_for_consumers(struct device *dev, bool async)
 	 * unregistration).
 	 */
 	dev_for_each_link_to_consumer(link, dev)
-		if (READ_ONCE(link->status) != DL_STATE_DORMANT)
+		if (READ_ONCE(link->status) != DL_STATE_DORMANT &&
+		    !device_link_flag_is_sync_state_only(link->flags))
 			dpm_wait(link->consumer, async);
 
 	device_links_read_unlock(idx);
-- 
cgit v1.2.3


From 0efdedfa537eb534c251a5b4794caaf72cc55869 Mon Sep 17 00:00:00 2001
From: Donet Tom <donettom@linux.ibm.com>
Date: Thu, 18 Sep 2025 11:11:44 +0530
Subject: drivers/base/node: fix double free in register_one_node()

When device_register() fails in register_node(), it calls
put_device(&node->dev).  This triggers node_device_release(), which calls
kfree(to_node(dev)), thereby freeing the entire node structure.

As a result, when register_node() returns an error, the node memory has
already been freed.  Calling kfree(node) again in register_one_node()
leads to a double free.

This patch removes the redundant kfree(node) from register_one_node() to
prevent the double free.

Link: https://lkml.kernel.org/r/20250918054144.58980-1-donettom@linux.ibm.com
Fixes: 786eb990cfb7 ("drivers/base/node: handle error properly in register_one_node()")
Signed-off-by: Donet Tom <donettom@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
Cc: Alison Schofield <alison.schofield@intel.com>
Cc: Chris Mason <clm@meta.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Cc: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/base/node.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers/base')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 45d512939c40..67b01d579737 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -887,7 +887,6 @@ int register_one_node(int nid)
 	error = register_node(node_devices[nid], nid);
 	if (error) {
 		node_devices[nid] = NULL;
-		kfree(node);
 		return error;
 	}
 
-- 
cgit v1.2.3


From 9a0abc39450a3123fd52533a662fbd37e0d1508c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 26 Sep 2025 17:47:14 +0200
Subject: PM: runtime: Add auto-cleanup macros for "resume and get" operations

It is generally useful to be able to automatically drop a device's
runtime PM usage counter incremented by runtime PM operations that
resume a device and bump up its usage counter [1].

To that end, add guard definition macros allowing pm_runtime_put()
and pm_runtime_put_autosuspend() to be used for the auto-cleanup in
those cases.

Simply put, a piece of code like below:

	pm_runtime_get_sync(dev);
	.....
	pm_runtime_put(dev);
	return 0;

can be transformed with guard() like:

	guard(pm_runtime_active)(dev);
	.....
	return 0;

(see the pm_runtime_put() call is gone).

However, it is better to do proper error handling in the majority of
cases, so doing something like this instead of the above is recommended:

	ACQUIRE(pm_runtime_active_try, pm)(dev);
	if (ACQUIRE_ERR(pm_runtime_active_try, &pm))
		return -ENXIO;
	.....
	return 0;

In all of the cases in which runtime PM is known to be enabled for the
given device or the device can be regarded as operational (and so it can
be accessed) with runtime PM disabled, a piece of code like:

	ret = pm_runtime_resume_and_get(dev);
	if (ret < 0)
		return ret;
	.....
	pm_runtime_put(dev);
	return 0;

can be changed as follows:

	ACQUIRE(pm_runtime_active_try, pm)(dev);
	ret = ACQUIRE_ERR(pm_runtime_active_try, &pm);
	if (ret < 0)
		return ret;
	.....
	return 0;

(again, see the pm_runtime_put() call is gone).

Still, if the device cannot be accessed unless runtime PM has been
enabled for it, the pm_runtime_active_try_enabled guard variant
needs to be used, that is (in the context of the example above):

	ACQUIRE(pm_runtime_active_try_enabled, pm)(dev);
	ret = ACQUIRE_ERR(pm_runtime_active_try_enabled, &pm);
	if (ret < 0)
		return ret;
	.....
	return 0;

When the original code calls pm_runtime_put_autosuspend(), use one
of the "auto" guard variants, pm_runtime_active_auto/_try/_enabled,
so for example, a piece of code like:

	ret = pm_runtime_resume_and_get(dev);
	if (ret < 0)
		return ret;
	.....
	pm_runtime_put_autosuspend(dev);
	return 0;

will become:

	ACQUIRE(pm_runtime_active_auto_try_enabled, pm)(dev);
	ret = ACQUIRE_ERR(pm_runtime_active_auto_try_enabled, &pm);
	if (ret < 0)
		return ret;
	.....
	return 0;

Note that the cases in which the return value of pm_runtime_get_sync()
is checked can also be handled with the help of the new guard macros.
For example, a piece of code like:

	ret = pm_runtime_get_sync(dev);
	if (ret < 0) {
		pm_runtime_put(dev);
		return ret;
	}
	.....
	pm_runtime_put(dev);
	return 0;

can be rewritten as:

	ACQUIRE(pm_runtime_active_auto_try_enabled, pm)(dev);
	ret = ACQUIRE_ERR(pm_runtime_active_auto_try_enabled, &pm);
	if (ret < 0)
		return ret;
	.....
	return 0;

or pm_runtime_get_active_try can be used if transparent handling of
disabled runtime PM is desirable.

Link: https://lore.kernel.org/linux-pm/878qimv24u.wl-tiwai@suse.de/ [1]
Link: https://lore.kernel.org/linux-pm/20250926150613.000073a4@huawei.com/
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/2238241.irdbgypaU6@rafael.j.wysocki
[ rjw: Fixed leftovers from the previous version in the changelog ]
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dhruva Gole <d-gole@ti.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime.c |  2 ++
 include/linux/pm_runtime.h   | 44 +++++++++++++++++++++++++++++++++++---------
 2 files changed, 37 insertions(+), 9 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index faa68bf9ef3d..0fc825066ede 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -799,6 +799,8 @@ static int rpm_resume(struct device *dev, int rpmflags)
 		if (dev->power.runtime_status == RPM_ACTIVE &&
 		    dev->power.last_status == RPM_ACTIVE)
 			retval = 1;
+		else if (rpmflags & RPM_TRANSPARENT)
+			goto out;
 		else
 			retval = -EACCES;
 	}
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index d1ff76e0e2d0..e5426bdd0c9f 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -21,6 +21,7 @@
 #define RPM_GET_PUT		0x04	/* Increment/decrement the
 					    usage_count */
 #define RPM_AUTO		0x08	/* Use autosuspend_delay */
+#define RPM_TRANSPARENT	0x10	/* Succeed if runtime PM is disabled */
 
 /*
  * Use this for defining a set of PM operations to be used in all situations
@@ -512,6 +513,19 @@ static inline int pm_runtime_get_sync(struct device *dev)
 	return __pm_runtime_resume(dev, RPM_GET_PUT);
 }
 
+static inline int pm_runtime_get_active(struct device *dev, int rpmflags)
+{
+	int ret;
+
+	ret = __pm_runtime_resume(dev, RPM_GET_PUT | rpmflags);
+	if (ret < 0) {
+		pm_runtime_put_noidle(dev);
+		return ret;
+	}
+
+	return 0;
+}
+
 /**
  * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it.
  * @dev: Target device.
@@ -522,15 +536,7 @@ static inline int pm_runtime_get_sync(struct device *dev)
  */
 static inline int pm_runtime_resume_and_get(struct device *dev)
 {
-	int ret;
-
-	ret = __pm_runtime_resume(dev, RPM_GET_PUT);
-	if (ret < 0) {
-		pm_runtime_put_noidle(dev);
-		return ret;
-	}
-
-	return 0;
+	return pm_runtime_get_active(dev, 0);
 }
 
 /**
@@ -610,6 +616,26 @@ static inline int pm_runtime_put_autosuspend(struct device *dev)
 	return __pm_runtime_put_autosuspend(dev);
 }
 
+DEFINE_GUARD(pm_runtime_active, struct device *,
+	     pm_runtime_get_sync(_T), pm_runtime_put(_T));
+DEFINE_GUARD(pm_runtime_active_auto, struct device *,
+	     pm_runtime_get_sync(_T), pm_runtime_put_autosuspend(_T));
+/*
+ * Use the following guards with ACQUIRE()/ACQUIRE_ERR().
+ *
+ * The difference between the "_try" and "_try_enabled" variants is that the
+ * former do not produce an error when runtime PM is disabled for the given
+ * device.
+ */
+DEFINE_GUARD_COND(pm_runtime_active, _try,
+		  pm_runtime_get_active(_T, RPM_TRANSPARENT))
+DEFINE_GUARD_COND(pm_runtime_active, _try_enabled,
+		  pm_runtime_resume_and_get(_T))
+DEFINE_GUARD_COND(pm_runtime_active_auto, _try,
+		  pm_runtime_get_active(_T, RPM_TRANSPARENT))
+DEFINE_GUARD_COND(pm_runtime_active_auto, _try_enabled,
+		  pm_runtime_resume_and_get(_T))
+
 /**
  * pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0.
  * @dev: Target device.
-- 
cgit v1.2.3


From 92158fae2ed986f44347fc5b9a269830862c1529 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Fri, 3 Oct 2025 12:29:31 +0300
Subject: PM: runtime: Fix error checking for kunit_device_register()

The kunit_device_register() function never returns NULL, it returns
error pointers.  Update the assertions to use
KUNIT_ASSERT_NOT_ERR_OR_NULL() instead of checking for NULL.

Fixes: 7f7acd193ba8 ("PM: runtime: Add basic kunit tests for API contracts")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime-test.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/power/runtime-test.c b/drivers/base/power/runtime-test.c
index eca9885e807d..477feca804c7 100644
--- a/drivers/base/power/runtime-test.c
+++ b/drivers/base/power/runtime-test.c
@@ -14,7 +14,7 @@ static void pm_runtime_depth_test(struct kunit *test)
 {
 	struct device *dev = kunit_device_register(test, DEVICE_NAME);
 
-	KUNIT_ASSERT_PTR_NE(test, NULL, dev);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
 
 	pm_runtime_enable(dev);
 
@@ -32,7 +32,7 @@ static void pm_runtime_already_suspended_test(struct kunit *test)
 {
 	struct device *dev = kunit_device_register(test, DEVICE_NAME);
 
-	KUNIT_ASSERT_PTR_NE(test, NULL, dev);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
 
 	pm_runtime_enable(dev);
 	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
@@ -70,7 +70,7 @@ static void pm_runtime_idle_test(struct kunit *test)
 {
 	struct device *dev = kunit_device_register(test, DEVICE_NAME);
 
-	KUNIT_ASSERT_PTR_NE(test, NULL, dev);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
 
 	pm_runtime_enable(dev);
 
@@ -91,7 +91,7 @@ static void pm_runtime_disabled_test(struct kunit *test)
 {
 	struct device *dev = kunit_device_register(test, DEVICE_NAME);
 
-	KUNIT_ASSERT_PTR_NE(test, NULL, dev);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
 
 	/* Never called pm_runtime_enable() */
 	KUNIT_EXPECT_FALSE(test, pm_runtime_enabled(dev));
@@ -131,7 +131,7 @@ static void pm_runtime_error_test(struct kunit *test)
 {
 	struct device *dev = kunit_device_register(test, DEVICE_NAME);
 
-	KUNIT_ASSERT_PTR_NE(test, NULL, dev);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
 
 	pm_runtime_enable(dev);
 	KUNIT_EXPECT_TRUE(test, pm_runtime_suspended(dev));
@@ -214,7 +214,7 @@ static void pm_runtime_probe_active_test(struct kunit *test)
 {
 	struct device *dev = kunit_device_register(test, DEVICE_NAME);
 
-	KUNIT_ASSERT_PTR_NE(test, NULL, dev);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
 
 	KUNIT_EXPECT_TRUE(test, pm_runtime_status_suspended(dev));
 
-- 
cgit v1.2.3


From c6a809363a66b8ff0f6a000b5f09408a1b33eeb5 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@kernel.org>
Date: Tue, 29 Jul 2025 08:46:34 +0200
Subject: drivers/base/memory: add node id parameter to add_memory_block()

Patch series "mm/memory_hotplug: fixup crash during uevent handling", v4.

we have some udev rules trying to read the sysfs attribute 'valid_zones'
during an memory 'add' event, causing a crash in zone_for_pfn_range().
Debugging found that mem->nid was set to NUMA_NO_NODE, which crashed in
NODE_DATA(nid).  Further analysis revealed that we're running into a race
with udev event processing: add_memory_resource() has this function calls:

1) __try_online_node()
2) arch_add_memory()
3) create_memory_block_devices()
  -> calls device_register() -> memory 'add' event
4) node_set_online()/__register_one_node()
  -> calls device_register() -> node 'add' event
5) register_memory_blocks_under_node()
  -> sets mem->nid

Which, to the uninitated, is ... weird ...

Why do we try to online the node in 1), but only register the node in 4)
_after_ we have created the memory blocks in 3) ?  And why do we set the
'nid' value in 5), when the uevent (which might need to see the correct
'nid' value) is sent out in 3) ?  There must be a reason, I'm sure ...

So here's a small patchset to fixup uevent ordering.  The first patch adds
a 'nid' parameter to add_memory_blocks() (to avoid mem->nid being
initialized with NUMA_NO_NODE), and the second patch reshuffles the code
in add_memory_resource() to fully initialize the node prior to calling
create_memory_block_devices() so that the node is valid at that time and
uevent processing will see correct values in sysfs.


This patch (of 3):

We have some udev rules trying to read the sysfs attribute 'valid_zones'
during an memory 'add' event, causing a crash in zone_for_pfn_range().
Debugging found that mem->nid was set to NUMA_NO_NODE, which crashed in
NODE_DATA(nid).  Further analysis revealed that we're running into a race
with udev event processing: add_memory_resource() has this function calls:

1) __try_online_node()
2) arch_add_memory()
3) create_memory_block_devices()
  -> calls device_register() -> memory 'add' event
4) node_set_online()/__register_one_node()
  -> calls device_register() -> node 'add' event
5) register_memory_blocks_under_node()
  -> sets mem->nid

Which, to the uninitated, is ... weird ...

Why do we try to online the node in 1), but only register the node in 4)
_after_ we have created the memory blocks in 3) ?  And why do we set the
'nid' value in 5), when the uevent (which might need to see the correct
'nid' value) is sent out in 3) ?  There must be a reason, I'm sure ...

So here's a small patchset to fixup uevent ordering.  The first patch adds
a 'nid' parameter to add_memory_blocks() (to avoid mem->nid being
initialized with NUMA_NO_NODE), and the second patch reshuffles the code
in add_memory_resource() to fully initialize the node prior to calling
create_memory_block_devices() so that the node is valid at that time and
uevent processing will see correct values in sysfs.


This patch (of 3):

Add a 'nid' parameter to add_memory_block() to initialize the memory block
with the correct node id.

Link: https://lkml.kernel.org/r/20250729064637.51662-1-hare@kernel.org
Link: https://lkml.kernel.org/r/20250729064637.51662-2-hare@kernel.org
Signed-off-by: Hannes Reinecke <hare@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Donet Tom <donettom@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/base/memory.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 5c6c1d6bb59f..894d3891292b 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -809,7 +809,7 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
 }
 #endif
 
-static int add_memory_block(unsigned long block_id, unsigned long state,
+static int add_memory_block(unsigned long block_id, int nid, unsigned long state,
 			    struct vmem_altmap *altmap,
 			    struct memory_group *group)
 {
@@ -827,7 +827,7 @@ static int add_memory_block(unsigned long block_id, unsigned long state,
 
 	mem->start_section_nr = block_id * sections_per_block;
 	mem->state = state;
-	mem->nid = NUMA_NO_NODE;
+	mem->nid = nid;
 	mem->altmap = altmap;
 	INIT_LIST_HEAD(&mem->group_next);
 
@@ -854,13 +854,6 @@ static int add_memory_block(unsigned long block_id, unsigned long state,
 	return 0;
 }
 
-static int add_hotplug_memory_block(unsigned long block_id,
-				    struct vmem_altmap *altmap,
-				    struct memory_group *group)
-{
-	return add_memory_block(block_id, MEM_OFFLINE, altmap, group);
-}
-
 static void remove_memory_block(struct memory_block *memory)
 {
 	if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
@@ -900,7 +893,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
 		return -EINVAL;
 
 	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
-		ret = add_hotplug_memory_block(block_id, altmap, group);
+		ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_OFFLINE, altmap, group);
 		if (ret)
 			break;
 	}
@@ -1005,7 +998,7 @@ void __init memory_dev_init(void)
 			continue;
 
 		block_id = memory_block_id(nr);
-		ret = add_memory_block(block_id, MEM_ONLINE, NULL, NULL);
+		ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_ONLINE, NULL, NULL);
 		if (ret) {
 			panic("%s() failed to add memory block: %d\n",
 			      __func__, ret);
-- 
cgit v1.2.3


From b8179af120943e2fc099ea87caa234039a709a66 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@kernel.org>
Date: Tue, 29 Jul 2025 08:46:35 +0200
Subject: mm/memory_hotplug: activate node before adding new memory blocks

The sysfs attributes for memory blocks require the node ID to be set and
initialized, so move the node activation before adding new memory blocks.
This also has the nice side effect that the BUG_ON() can be converted into
a WARN_ON() as we now can handle registration errors.

Link: https://lkml.kernel.org/r/20250729064637.51662-3-hare@kernel.org
Fixes: b9ff036082cd ("mm/memory_hotplug.c: make add_memory_resource use __try_online_node")
Signed-off-by: Hannes Reinecke <hare@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Donet Tom <donettom@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/base/memory.c  |  4 ++--
 include/linux/memory.h |  2 +-
 mm/memory_hotplug.c    | 32 +++++++++++++++++---------------
 3 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 894d3891292b..fb212a889e65 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -879,7 +879,7 @@ static void remove_memory_block(struct memory_block *memory)
  * Called under device_hotplug_lock.
  */
 int create_memory_block_devices(unsigned long start, unsigned long size,
-				struct vmem_altmap *altmap,
+				int nid, struct vmem_altmap *altmap,
 				struct memory_group *group)
 {
 	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
@@ -893,7 +893,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
 		return -EINVAL;
 
 	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
-		ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_OFFLINE, altmap, group);
+		ret = add_memory_block(block_id, nid, MEM_OFFLINE, altmap, group);
 		if (ret)
 			break;
 	}
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 40eb70ccb09d..4a29153e372e 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -159,7 +159,7 @@ static inline unsigned long memory_block_advised_max_size(void)
 extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
 int create_memory_block_devices(unsigned long start, unsigned long size,
-				struct vmem_altmap *altmap,
+				int nid, struct vmem_altmap *altmap,
 				struct memory_group *group);
 void remove_memory_block_devices(unsigned long start, unsigned long size);
 extern void memory_dev_init(void);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e9f14de4a9c9..0be83039c3b5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1477,7 +1477,7 @@ static int create_altmaps_and_memory_blocks(int nid, struct memory_group *group,
 		}
 
 		/* create memory block devices after memory was added */
-		ret = create_memory_block_devices(cur_start, memblock_size,
+		ret = create_memory_block_devices(cur_start, memblock_size, nid,
 						  params.altmap, group);
 		if (ret) {
 			arch_remove_memory(cur_start, memblock_size, NULL);
@@ -1539,8 +1539,16 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
 
 	ret = __try_online_node(nid, false);
 	if (ret < 0)
-		goto error;
-	new_node = ret;
+		goto error_memblock_remove;
+	if (ret) {
+		node_set_online(nid);
+		ret = register_one_node(nid);
+		if (WARN_ON(ret)) {
+			node_set_offline(nid);
+			goto error_memblock_remove;
+		}
+		new_node = true;
+	}
 
 	/*
 	 * Self hosted memmap array
@@ -1556,24 +1564,13 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
 			goto error;
 
 		/* create memory block devices after memory was added */
-		ret = create_memory_block_devices(start, size, NULL, group);
+		ret = create_memory_block_devices(start, size, nid, NULL, group);
 		if (ret) {
 			arch_remove_memory(start, size, params.altmap);
 			goto error;
 		}
 	}
 
-	if (new_node) {
-		/* If sysfs file of new node can't be created, cpu on the node
-		 * can't be hot-added. There is no rollback way now.
-		 * So, check by BUG_ON() to catch it reluctantly..
-		 * We online node here. We can't roll back from here.
-		 */
-		node_set_online(nid);
-		ret = register_one_node(nid);
-		BUG_ON(ret);
-	}
-
 	register_memory_blocks_under_node_hotplug(nid, PFN_DOWN(start),
 					  PFN_UP(start + size - 1));
 
@@ -1597,6 +1594,11 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
 
 	return ret;
 error:
+	if (new_node) {
+		node_set_offline(nid);
+		unregister_one_node(nid);
+	}
+error_memblock_remove:
 	if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
 		memblock_remove(start, size);
 error_mem_hotplug_end:
-- 
cgit v1.2.3


From 0a947c14e48cbf9de222836170282e0167a9e096 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@kernel.org>
Date: Tue, 29 Jul 2025 08:46:36 +0200
Subject: drivers/base: move memory_block_add_nid() into the caller

Now the node id only needs to be set for early memory, so move
memory_block_add_nid() into the caller and rename it into
memory_block_add_nid_early().  This allows us to further simplify the code
by dropping the 'context' argument to
do_register_memory_block_under_node().

Link: https://lkml.kernel.org/r/20250729064637.51662-4-hare@kernel.org
Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Hannes Reinecke <hare@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Donet Tom <donettom@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/base/memory.c  | 36 ++++++++++++++++++------------------
 drivers/base/node.c    | 10 ++++------
 include/linux/memory.h |  3 +--
 3 files changed, 23 insertions(+), 26 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index fb212a889e65..6d84a02cfa5d 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -769,21 +769,22 @@ static struct zone *early_node_zone_for_memory_block(struct memory_block *mem,
 
 #ifdef CONFIG_NUMA
 /**
- * memory_block_add_nid() - Indicate that system RAM falling into this memory
- *			    block device (partially) belongs to the given node.
+ * memory_block_add_nid_early() - Indicate that early system RAM falling into
+ *				  this memory block device (partially) belongs
+ *				  to the given node.
  * @mem: The memory block device.
  * @nid: The node id.
- * @context: The memory initialization context.
  *
- * Indicate that system RAM falling into this memory block (partially) belongs
- * to the given node. If the context indicates ("early") that we are adding the
- * node during node device subsystem initialization, this will also properly
- * set/adjust mem->zone based on the zone ranges of the given node.
+ * Indicate that early system RAM falling into this memory block (partially)
+ * belongs to the given node. This will also properly set/adjust mem->zone based
+ * on the zone ranges of the given node.
+ *
+ * Memory hotplug handles this on memory block creation, where we can only have
+ * a single nid span a memory block.
  */
-void memory_block_add_nid(struct memory_block *mem, int nid,
-			  enum meminit_context context)
+void memory_block_add_nid_early(struct memory_block *mem, int nid)
 {
-	if (context == MEMINIT_EARLY && mem->nid != nid) {
+	if (mem->nid != nid) {
 		/*
 		 * For early memory we have to determine the zone when setting
 		 * the node id and handle multiple nodes spanning a single
@@ -797,15 +798,14 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
 			mem->zone = early_node_zone_for_memory_block(mem, nid);
 		else
 			mem->zone = NULL;
+		/*
+		 * If this memory block spans multiple nodes, we only indicate
+		 * the last processed node. If we span multiple nodes (not applicable
+		 * to hotplugged memory), zone == NULL will prohibit memory offlining
+		 * and consequently unplug.
+		 */
+		mem->nid = nid;
 	}
-
-	/*
-	 * If this memory block spans multiple nodes, we only indicate
-	 * the last processed node. If we span multiple nodes (not applicable
-	 * to hotplugged memory), zone == NULL will prohibit memory offlining
-	 * and consequently unplug.
-	 */
-	mem->nid = nid;
 }
 #endif
 
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 67b01d579737..6b6e55a98b79 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -781,13 +781,10 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 static void do_register_memory_block_under_node(int nid,
-						struct memory_block *mem_blk,
-						enum meminit_context context)
+						struct memory_block *mem_blk)
 {
 	int ret;
 
-	memory_block_add_nid(mem_blk, nid, context);
-
 	ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
 				       &mem_blk->dev.kobj,
 				       kobject_name(&mem_blk->dev.kobj));
@@ -815,7 +812,7 @@ static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk,
 {
 	int nid = *(int *)arg;
 
-	do_register_memory_block_under_node(nid, mem_blk, MEMINIT_HOTPLUG);
+	do_register_memory_block_under_node(nid, mem_blk);
 	return 0;
 }
 
@@ -855,7 +852,8 @@ static void register_memory_blocks_under_nodes(void)
 			if (!mem)
 				continue;
 
-			do_register_memory_block_under_node(nid, mem, MEMINIT_EARLY);
+			memory_block_add_nid_early(mem, nid);
+			do_register_memory_block_under_node(nid, mem);
 			put_device(&mem->dev);
 		}
 
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 4a29153e372e..43d378038ce2 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -202,8 +202,7 @@ static inline unsigned long phys_to_block_id(unsigned long phys)
 }
 
 #ifdef CONFIG_NUMA
-void memory_block_add_nid(struct memory_block *mem, int nid,
-			  enum meminit_context context);
+void memory_block_add_nid_early(struct memory_block *mem, int nid);
 #endif /* CONFIG_NUMA */
 int memory_block_advise_max_size(unsigned long size);
 unsigned long memory_block_advised_max_size(void);
-- 
cgit v1.2.3


From 74b84d1be0220b99405c16a4a3e1e503e3bd8387 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 7 Oct 2025 11:43:12 +0200
Subject: driver core: fw_devlink: Don't warn about sync_state() pending

Due to the wider deployment of the ->sync_state() support, for PM domains
for example, we are receiving reports about the sync_state() pending
message that is being logged in fw_devlink_dev_sync_state(). In particular
as it's printed at the warning level, which is questionable.

Even if it certainly is useful to know that the ->sync_state() condition
could not be met, there may be nothing wrong with it. For example, a driver
may be built as module and are still waiting to be initialized/probed. For
this reason let's move to the info level for now.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reported-by: Sebin Francis <sebin.francis@ti.com>
Reported-by: Diederik de Haas <didi.debian@cknow.org>
Reported-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Dhruva Gole <d-gole@ti.com>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Acked-by: Saravana Kannan <saravanak@google.com>
Reviewed-by: Sebin Francis <sebin.francis@ti.com>
Tested-by: Sebin Francis <sebin.francis@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/base')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 3c533dab8fa5..f69dc9c85954 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1784,7 +1784,7 @@ static int fw_devlink_dev_sync_state(struct device *dev, void *data)
 		return 0;
 
 	if (fw_devlink_sync_state == FW_DEVLINK_SYNC_STATE_STRICT) {
-		dev_warn(sup, "sync_state() pending due to %s\n",
+		dev_info(sup, "sync_state() pending due to %s\n",
 			 dev_name(link->consumer));
 		return 0;
 	}
-- 
cgit v1.2.3


From a91c8096590bd7801a26454789f2992094fe36da Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <dev@lankhorst.se>
Date: Wed, 23 Jul 2025 16:24:16 +0200
Subject: devcoredump: Fix circular locking dependency with devcd->mutex.

The original code causes a circular locking dependency found by lockdep.

======================================================
WARNING: possible circular locking dependency detected
6.16.0-rc6-lgci-xe-xe-pw-151626v3+ #1 Tainted: G S   U
------------------------------------------------------
xe_fault_inject/5091 is trying to acquire lock:
ffff888156815688 ((work_completion)(&(&devcd->del_wk)->work)){+.+.}-{0:0}, at: __flush_work+0x25d/0x660

but task is already holding lock:

ffff888156815620 (&devcd->mutex){+.+.}-{3:3}, at: dev_coredump_put+0x3f/0xa0
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (&devcd->mutex){+.+.}-{3:3}:
       mutex_lock_nested+0x4e/0xc0
       devcd_data_write+0x27/0x90
       sysfs_kf_bin_write+0x80/0xf0
       kernfs_fop_write_iter+0x169/0x220
       vfs_write+0x293/0x560
       ksys_write+0x72/0xf0
       __x64_sys_write+0x19/0x30
       x64_sys_call+0x2bf/0x2660
       do_syscall_64+0x93/0xb60
       entry_SYSCALL_64_after_hwframe+0x76/0x7e
-> #1 (kn->active#236){++++}-{0:0}:
       kernfs_drain+0x1e2/0x200
       __kernfs_remove+0xae/0x400
       kernfs_remove_by_name_ns+0x5d/0xc0
       remove_files+0x54/0x70
       sysfs_remove_group+0x3d/0xa0
       sysfs_remove_groups+0x2e/0x60
       device_remove_attrs+0xc7/0x100
       device_del+0x15d/0x3b0
       devcd_del+0x19/0x30
       process_one_work+0x22b/0x6f0
       worker_thread+0x1e8/0x3d0
       kthread+0x11c/0x250
       ret_from_fork+0x26c/0x2e0
       ret_from_fork_asm+0x1a/0x30
-> #0 ((work_completion)(&(&devcd->del_wk)->work)){+.+.}-{0:0}:
       __lock_acquire+0x1661/0x2860
       lock_acquire+0xc4/0x2f0
       __flush_work+0x27a/0x660
       flush_delayed_work+0x5d/0xa0
       dev_coredump_put+0x63/0xa0
       xe_driver_devcoredump_fini+0x12/0x20 [xe]
       devm_action_release+0x12/0x30
       release_nodes+0x3a/0x120
       devres_release_all+0x8a/0xd0
       device_unbind_cleanup+0x12/0x80
       device_release_driver_internal+0x23a/0x280
       device_driver_detach+0x14/0x20
       unbind_store+0xaf/0xc0
       drv_attr_store+0x21/0x50
       sysfs_kf_write+0x4a/0x80
       kernfs_fop_write_iter+0x169/0x220
       vfs_write+0x293/0x560
       ksys_write+0x72/0xf0
       __x64_sys_write+0x19/0x30
       x64_sys_call+0x2bf/0x2660
       do_syscall_64+0x93/0xb60
       entry_SYSCALL_64_after_hwframe+0x76/0x7e
other info that might help us debug this:
Chain exists of: (work_completion)(&(&devcd->del_wk)->work) --> kn->active#236 --> &devcd->mutex
 Possible unsafe locking scenario:
       CPU0                    CPU1
       ----                    ----
  lock(&devcd->mutex);
                               lock(kn->active#236);
                               lock(&devcd->mutex);
  lock((work_completion)(&(&devcd->del_wk)->work));
 *** DEADLOCK ***
5 locks held by xe_fault_inject/5091:
 #0: ffff8881129f9488 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x72/0xf0
 #1: ffff88810c755078 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x123/0x220
 #2: ffff8881054811a0 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x55/0x280
 #3: ffff888156815620 (&devcd->mutex){+.+.}-{3:3}, at: dev_coredump_put+0x3f/0xa0
 #4: ffffffff8359e020 (rcu_read_lock){....}-{1:2}, at: __flush_work+0x72/0x660
stack backtrace:
CPU: 14 UID: 0 PID: 5091 Comm: xe_fault_inject Tainted: G S   U              6.16.0-rc6-lgci-xe-xe-pw-151626v3+ #1 PREEMPT_{RT,(lazy)}
Tainted: [S]=CPU_OUT_OF_SPEC, [U]=USER
Hardware name: Micro-Star International Co., Ltd. MS-7D25/PRO Z690-A DDR4(MS-7D25), BIOS 1.10 12/13/2021
Call Trace:
 <TASK>
 dump_stack_lvl+0x91/0xf0
 dump_stack+0x10/0x20
 print_circular_bug+0x285/0x360
 check_noncircular+0x135/0x150
 ? register_lock_class+0x48/0x4a0
 __lock_acquire+0x1661/0x2860
 lock_acquire+0xc4/0x2f0
 ? __flush_work+0x25d/0x660
 ? mark_held_locks+0x46/0x90
 ? __flush_work+0x25d/0x660
 __flush_work+0x27a/0x660
 ? __flush_work+0x25d/0x660
 ? trace_hardirqs_on+0x1e/0xd0
 ? __pfx_wq_barrier_func+0x10/0x10
 flush_delayed_work+0x5d/0xa0
 dev_coredump_put+0x63/0xa0
 xe_driver_devcoredump_fini+0x12/0x20 [xe]
 devm_action_release+0x12/0x30
 release_nodes+0x3a/0x120
 devres_release_all+0x8a/0xd0
 device_unbind_cleanup+0x12/0x80
 device_release_driver_internal+0x23a/0x280
 ? bus_find_device+0xa8/0xe0
 device_driver_detach+0x14/0x20
 unbind_store+0xaf/0xc0
 drv_attr_store+0x21/0x50
 sysfs_kf_write+0x4a/0x80
 kernfs_fop_write_iter+0x169/0x220
 vfs_write+0x293/0x560
 ksys_write+0x72/0xf0
 __x64_sys_write+0x19/0x30
 x64_sys_call+0x2bf/0x2660
 do_syscall_64+0x93/0xb60
 ? __f_unlock_pos+0x15/0x20
 ? __x64_sys_getdents64+0x9b/0x130
 ? __pfx_filldir64+0x10/0x10
 ? do_syscall_64+0x1a2/0xb60
 ? clear_bhb_loop+0x30/0x80
 ? clear_bhb_loop+0x30/0x80
 entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x76e292edd574
Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d d5 ea 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89
RSP: 002b:00007fffe247a828 EFLAGS: 00000202 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 000076e292edd574
RDX: 000000000000000c RSI: 00006267f6306063 RDI: 000000000000000b
RBP: 000000000000000c R08: 000076e292fc4b20 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000202 R12: 00006267f6306063
R13: 000000000000000b R14: 00006267e6859c00 R15: 000076e29322a000
 </TASK>
xe 0000:03:00.0: [drm] Xe device coredump has been deleted.

Fixes: 01daccf74832 ("devcoredump : Serialize devcd_del work")
Cc: Mukesh Ojha <quic_mojha@quicinc.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org # v6.1+
Signed-off-by: Maarten Lankhorst <dev@lankhorst.se>
Cc: Matthew Brost <matthew.brost@intel.com>
Acked-by: Mukesh Ojha <mukesh.ojha@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20250723142416.1020423-1-dev@lankhorst.se
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/devcoredump.c | 136 +++++++++++++++++++++++++++------------------
 1 file changed, 83 insertions(+), 53 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
index 37faf6156d7c..55bdc7f5e59d 100644
--- a/drivers/base/devcoredump.c
+++ b/drivers/base/devcoredump.c
@@ -23,50 +23,46 @@ struct devcd_entry {
 	void *data;
 	size_t datalen;
 	/*
-	 * Here, mutex is required to serialize the calls to del_wk work between
-	 * user/kernel space which happens when devcd is added with device_add()
-	 * and that sends uevent to user space. User space reads the uevents,
-	 * and calls to devcd_data_write() which try to modify the work which is
-	 * not even initialized/queued from devcoredump.
+	 * There are 2 races for which mutex is required.
 	 *
+	 * The first race is between device creation and userspace writing to
+	 * schedule immediately destruction.
 	 *
+	 * This race is handled by arming the timer before device creation, but
+	 * when device creation fails the timer still exists.
 	 *
-	 *        cpu0(X)                                 cpu1(Y)
+	 * To solve this, hold the mutex during device_add(), and set
+	 * init_completed on success before releasing the mutex.
 	 *
-	 *        dev_coredump() uevent sent to user space
-	 *        device_add()  ======================> user space process Y reads the
-	 *                                              uevents writes to devcd fd
-	 *                                              which results into writes to
+	 * That way the timer will never fire until device_add() is called,
+	 * it will do nothing if init_completed is not set. The timer is also
+	 * cancelled in that case.
 	 *
-	 *                                             devcd_data_write()
-	 *                                               mod_delayed_work()
-	 *                                                 try_to_grab_pending()
-	 *                                                   timer_delete()
-	 *                                                     debug_assert_init()
-	 *       INIT_DELAYED_WORK()
-	 *       schedule_delayed_work()
-	 *
-	 *
-	 * Also, mutex alone would not be enough to avoid scheduling of
-	 * del_wk work after it get flush from a call to devcd_free()
-	 * mentioned as below.
-	 *
-	 *	disabled_store()
-	 *        devcd_free()
-	 *          mutex_lock()             devcd_data_write()
-	 *          flush_delayed_work()
-	 *          mutex_unlock()
-	 *                                   mutex_lock()
-	 *                                   mod_delayed_work()
-	 *                                   mutex_unlock()
-	 * So, delete_work flag is required.
+	 * The second race involves multiple parallel invocations of devcd_free(),
+	 * add a deleted flag so only 1 can call the destructor.
 	 */
 	struct mutex mutex;
-	bool delete_work;
+	bool init_completed, deleted;
 	struct module *owner;
 	ssize_t (*read)(char *buffer, loff_t offset, size_t count,
 			void *data, size_t datalen);
 	void (*free)(void *data);
+	/*
+	 * If nothing interferes and device_add() was returns success,
+	 * del_wk will destroy the device after the timer fires.
+	 *
+	 * Multiple userspace processes can interfere in the working of the timer:
+	 * - Writing to the coredump will reschedule the timer to run immediately,
+	 *   if still armed.
+	 *
+	 *   This is handled by using "if (cancel_delayed_work()) {
+	 *   schedule_delayed_work() }", to prevent re-arming after having
+	 *   been previously fired.
+	 * - Writing to /sys/class/devcoredump/disabled will destroy the
+	 *   coredump synchronously.
+	 *   This is handled by using disable_delayed_work_sync(), and then
+	 *   checking if deleted flag is set with &devcd->mutex held.
+	 */
 	struct delayed_work del_wk;
 	struct device *failing_dev;
 };
@@ -95,14 +91,27 @@ static void devcd_dev_release(struct device *dev)
 	kfree(devcd);
 }
 
+static void __devcd_del(struct devcd_entry *devcd)
+{
+	devcd->deleted = true;
+	device_del(&devcd->devcd_dev);
+	put_device(&devcd->devcd_dev);
+}
+
 static void devcd_del(struct work_struct *wk)
 {
 	struct devcd_entry *devcd;
+	bool init_completed;
 
 	devcd = container_of(wk, struct devcd_entry, del_wk.work);
 
-	device_del(&devcd->devcd_dev);
-	put_device(&devcd->devcd_dev);
+	/* devcd->mutex serializes against dev_coredumpm_timeout */
+	mutex_lock(&devcd->mutex);
+	init_completed = devcd->init_completed;
+	mutex_unlock(&devcd->mutex);
+
+	if (init_completed)
+		__devcd_del(devcd);
 }
 
 static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj,
@@ -122,12 +131,12 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
 	struct device *dev = kobj_to_dev(kobj);
 	struct devcd_entry *devcd = dev_to_devcd(dev);
 
-	mutex_lock(&devcd->mutex);
-	if (!devcd->delete_work) {
-		devcd->delete_work = true;
-		mod_delayed_work(system_wq, &devcd->del_wk, 0);
-	}
-	mutex_unlock(&devcd->mutex);
+	/*
+	 * Although it's tempting to use mod_delayed work here,
+	 * that will cause a reschedule if the timer already fired.
+	 */
+	if (cancel_delayed_work(&devcd->del_wk))
+		schedule_delayed_work(&devcd->del_wk, 0);
 
 	return count;
 }
@@ -151,11 +160,21 @@ static int devcd_free(struct device *dev, void *data)
 {
 	struct devcd_entry *devcd = dev_to_devcd(dev);
 
+	/*
+	 * To prevent a race with devcd_data_write(), disable work and
+	 * complete manually instead.
+	 *
+	 * We cannot rely on the return value of
+	 * disable_delayed_work_sync() here, because it might be in the
+	 * middle of a cancel_delayed_work + schedule_delayed_work pair.
+	 *
+	 * devcd->mutex here guards against multiple parallel invocations
+	 * of devcd_free().
+	 */
+	disable_delayed_work_sync(&devcd->del_wk);
 	mutex_lock(&devcd->mutex);
-	if (!devcd->delete_work)
-		devcd->delete_work = true;
-
-	flush_delayed_work(&devcd->del_wk);
+	if (!devcd->deleted)
+		__devcd_del(devcd);
 	mutex_unlock(&devcd->mutex);
 	return 0;
 }
@@ -179,12 +198,10 @@ static ssize_t disabled_show(const struct class *class, const struct class_attri
  *                                                                 put_device() <- last reference
  *             error = fn(dev, data)                           devcd_dev_release()
  *             devcd_free(dev, data)                           kfree(devcd)
- *             mutex_lock(&devcd->mutex);
  *
  *
  * In the above diagram, it looks like disabled_store() would be racing with parallelly
- * running devcd_del() and result in memory abort while acquiring devcd->mutex which
- * is called after kfree of devcd memory after dropping its last reference with
+ * running devcd_del() and result in memory abort after dropping its last reference with
  * put_device(). However, this will not happens as fn(dev, data) runs
  * with its own reference to device via klist_node so it is not its last reference.
  * so, above situation would not occur.
@@ -374,7 +391,7 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner,
 	devcd->read = read;
 	devcd->free = free;
 	devcd->failing_dev = get_device(dev);
-	devcd->delete_work = false;
+	devcd->deleted = false;
 
 	mutex_init(&devcd->mutex);
 	device_initialize(&devcd->devcd_dev);
@@ -383,8 +400,14 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner,
 		     atomic_inc_return(&devcd_count));
 	devcd->devcd_dev.class = &devcd_class;
 
-	mutex_lock(&devcd->mutex);
 	dev_set_uevent_suppress(&devcd->devcd_dev, true);
+
+	/* devcd->mutex prevents devcd_del() completing until init finishes */
+	mutex_lock(&devcd->mutex);
+	devcd->init_completed = false;
+	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
+	schedule_delayed_work(&devcd->del_wk, timeout);
+
 	if (device_add(&devcd->devcd_dev))
 		goto put_device;
 
@@ -401,13 +424,20 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner,
 
 	dev_set_uevent_suppress(&devcd->devcd_dev, false);
 	kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
-	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
-	schedule_delayed_work(&devcd->del_wk, timeout);
+
+	/*
+	 * Safe to run devcd_del() now that we are done with devcd_dev.
+	 * Alternatively we could have taken a ref on devcd_dev before
+	 * dropping the lock.
+	 */
+	devcd->init_completed = true;
 	mutex_unlock(&devcd->mutex);
 	return;
  put_device:
-	put_device(&devcd->devcd_dev);
 	mutex_unlock(&devcd->mutex);
+	cancel_delayed_work_sync(&devcd->del_wk);
+	put_device(&devcd->devcd_dev);
+
  put_module:
 	module_put(owner);
  free:
-- 
cgit v1.2.3


From 2eead19334516c8e9927c11b448fbe512b1f18a1 Mon Sep 17 00:00:00 2001
From: Kaushlendra Kumar <kaushlendra.kumar@intel.com>
Date: Tue, 23 Sep 2025 23:13:08 +0530
Subject: arch_topology: Fix incorrect error check in
 topology_parse_cpu_capacity()

Fix incorrect use of PTR_ERR_OR_ZERO() in topology_parse_cpu_capacity()
which causes the code to proceed with NULL clock pointers. The current
logic uses !PTR_ERR_OR_ZERO(cpu_clk) which evaluates to true for both
valid pointers and NULL, leading to potential NULL pointer dereference
in clk_get_rate().

Per include/linux/err.h documentation, PTR_ERR_OR_ZERO(ptr) returns:
"The error code within @ptr if it is an error pointer; 0 otherwise."

This means PTR_ERR_OR_ZERO() returns 0 for both valid pointers AND NULL
pointers. Therefore !PTR_ERR_OR_ZERO(cpu_clk) evaluates to true (proceed)
when cpu_clk is either valid or NULL, causing clk_get_rate(NULL) to be
called when of_clk_get() returns NULL.

Replace with !IS_ERR_OR_NULL(cpu_clk) which only proceeds for valid
pointers, preventing potential NULL pointer dereference in clk_get_rate().

Cc: stable <stable@kernel.org>
Signed-off-by: Kaushlendra Kumar <kaushlendra.kumar@intel.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Fixes: b8fe128dad8f ("arch_topology: Adjust initial CPU capacities with current freq")
Link: https://patch.msgid.link/20250923174308.1771906-1-kaushlendra.kumar@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/arch_topology.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/base')

diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 1037169abb45..e1eff05bea4a 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -292,7 +292,7 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
 		 * frequency (by keeping the initial capacity_freq_ref value).
 		 */
 		cpu_clk = of_clk_get(cpu_node, 0);
-		if (!PTR_ERR_OR_ZERO(cpu_clk)) {
+		if (!IS_ERR_OR_NULL(cpu_clk)) {
 			per_cpu(capacity_freq_ref, cpu) =
 				clk_get_rate(cpu_clk) / HZ_PER_KHZ;
 			clk_put(cpu_clk);
-- 
cgit v1.2.3


From 434f7349a1f00618a620b316f091bd13a12bc8d2 Mon Sep 17 00:00:00 2001
From: Alexey Klimov <alexey.klimov@linaro.org>
Date: Wed, 22 Oct 2025 21:10:12 +0100
Subject: regmap: slimbus: fix bus_context pointer in regmap init calls

Commit 4e65bda8273c ("ASoC: wcd934x: fix error handling in
wcd934x_codec_parse_data()") revealed the problem in the slimbus regmap.
That commit breaks audio playback, for instance, on sdm845 Thundercomm
Dragonboard 845c board:

 Unable to handle kernel paging request at virtual address ffff8000847cbad4
 ...
 CPU: 5 UID: 0 PID: 776 Comm: aplay Not tainted 6.18.0-rc1-00028-g7ea30958b305 #11 PREEMPT
 Hardware name: Thundercomm Dragonboard 845c (DT)
 ...
 Call trace:
  slim_xfer_msg+0x24/0x1ac [slimbus] (P)
  slim_read+0x48/0x74 [slimbus]
  regmap_slimbus_read+0x18/0x24 [regmap_slimbus]
  _regmap_raw_read+0xe8/0x174
  _regmap_bus_read+0x44/0x80
  _regmap_read+0x60/0xd8
  _regmap_update_bits+0xf4/0x140
  _regmap_select_page+0xa8/0x124
  _regmap_raw_write_impl+0x3b8/0x65c
  _regmap_bus_raw_write+0x60/0x80
  _regmap_write+0x58/0xc0
  regmap_write+0x4c/0x80
  wcd934x_hw_params+0x494/0x8b8 [snd_soc_wcd934x]
  snd_soc_dai_hw_params+0x3c/0x7c [snd_soc_core]
  __soc_pcm_hw_params+0x22c/0x634 [snd_soc_core]
  dpcm_be_dai_hw_params+0x1d4/0x38c [snd_soc_core]
  dpcm_fe_dai_hw_params+0x9c/0x17c [snd_soc_core]
  snd_pcm_hw_params+0x124/0x464 [snd_pcm]
  snd_pcm_common_ioctl+0x110c/0x1820 [snd_pcm]
  snd_pcm_ioctl+0x34/0x4c [snd_pcm]
  __arm64_sys_ioctl+0xac/0x104
  invoke_syscall+0x48/0x104
  el0_svc_common.constprop.0+0x40/0xe0
  do_el0_svc+0x1c/0x28
  el0_svc+0x34/0xec
  el0t_64_sync_handler+0xa0/0xf0
  el0t_64_sync+0x198/0x19c

The __devm_regmap_init_slimbus() started to be used instead of
__regmap_init_slimbus() after the commit mentioned above and turns out
the incorrect bus_context pointer (3rd argument) was used in
__devm_regmap_init_slimbus(). It should be just "slimbus" (which is equal
to &slimbus->dev). Correct it. The wcd934x codec seems to be the only or
the first user of devm_regmap_init_slimbus() but we should fix it till
the point where __devm_regmap_init_slimbus() was introduced therefore
two "Fixes" tags.

While at this, also correct the same argument in __regmap_init_slimbus().

Fixes: 4e65bda8273c ("ASoC: wcd934x: fix error handling in wcd934x_codec_parse_data()")
Fixes: 7d6f7fb053ad ("regmap: add SLIMbus support")
Cc: stable@vger.kernel.org
Cc: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Cc: Ma Ke <make24@iscas.ac.cn>
Cc: Steev Klimaszewski <steev@kali.org>
Cc: Srinivas Kandagatla <srini@kernel.org>
Reviewed-by: Abel Vesa <abel.vesa@linaro.org>
Signed-off-by: Alexey Klimov <alexey.klimov@linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
Link: https://patch.msgid.link/20251022201013.1740211-1-alexey.klimov@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-slimbus.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'drivers/base')

diff --git a/drivers/base/regmap/regmap-slimbus.c b/drivers/base/regmap/regmap-slimbus.c
index 54eb7d227cf4..e523fae73004 100644
--- a/drivers/base/regmap/regmap-slimbus.c
+++ b/drivers/base/regmap/regmap-slimbus.c
@@ -48,8 +48,7 @@ struct regmap *__regmap_init_slimbus(struct slim_device *slimbus,
 	if (IS_ERR(bus))
 		return ERR_CAST(bus);
 
-	return __regmap_init(&slimbus->dev, bus, &slimbus->dev, config,
-			     lock_key, lock_name);
+	return __regmap_init(&slimbus->dev, bus, slimbus, config, lock_key, lock_name);
 }
 EXPORT_SYMBOL_GPL(__regmap_init_slimbus);
 
@@ -63,8 +62,7 @@ struct regmap *__devm_regmap_init_slimbus(struct slim_device *slimbus,
 	if (IS_ERR(bus))
 		return ERR_CAST(bus);
 
-	return __devm_regmap_init(&slimbus->dev, bus, &slimbus, config,
-				  lock_key, lock_name);
+	return __devm_regmap_init(&slimbus->dev, bus, slimbus, config, lock_key, lock_name);
 }
 EXPORT_SYMBOL_GPL(__devm_regmap_init_slimbus);
 
-- 
cgit v1.2.3