From f367474b5884edbc42661e7fecf784cb131dd25d Mon Sep 17 00:00:00 2001 From: Brian Mak Date: Tue, 5 Aug 2025 14:15:27 -0700 Subject: x86/kexec: carry forward the boot DTB on kexec Currently, the kexec_file_load syscall on x86 does not support passing a device tree blob to the new kernel. Some embedded x86 systems use device trees. On these systems, failing to pass a device tree to the new kernel causes a boot failure. To add support for this, we copy the behavior of ARM64 and PowerPC and copy the current boot's device tree blob for use in the new kernel. We do this on x86 by passing the device tree blob as a setup_data entry in accordance with the x86 boot protocol. This behavior is gated behind the KEXEC_FILE_FORCE_DTB flag. Link: https://lkml.kernel.org/r/20250805211527.122367-3-makb@juniper.net Signed-off-by: Brian Mak Cc: Alexander Graf Cc: Baoquan He Cc: Borislav Betkov Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Rob Herring Cc: Saravana Kannan Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- include/linux/kexec.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 39fe3e6cd282..ff7e231b0485 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -395,6 +395,9 @@ struct kimage { /* Information for loading purgatory */ struct purgatory_info purgatory_info; + + /* Force carrying over the DTB from the current boot */ + bool force_dtb; #endif #ifdef CONFIG_CRASH_HOTPLUG @@ -461,7 +464,7 @@ bool kexec_load_permitted(int kexec_image_type); /* List of defined/legal kexec file flags */ #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG | \ - KEXEC_FILE_NO_CMA) + KEXEC_FILE_NO_CMA | KEXEC_FILE_FORCE_DTB) /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; -- cgit v1.2.3 From c8a09fc9664f79eeb66cdf4a2a34d5b6a239b727 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 14 Jul 2025 10:17:09 +0200 Subject: ida: remove the ida_simple_xxx() API All users of the ida_simple_xxx() have been converted. In Linux 6.11-rc2, the only callers are in tools/testing/. So it is now time to remove the definition of this old and deprecated ida_simple_get() and ida_simple_remove(). Link: https://lkml.kernel.org/r/aa205f45fef70a9c948b6a98bad06da58e4de776.1752480043.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/idr.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 2267902d29a7..789e23e67444 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -334,14 +334,6 @@ static inline void ida_init(struct ida *ida) xa_init_flags(&ida->xa, IDA_INIT_FLAGS); } -/* - * ida_simple_get() and ida_simple_remove() are deprecated. Use - * ida_alloc() and ida_free() instead respectively. - */ -#define ida_simple_get(ida, start, end, gfp) \ - ida_alloc_range(ida, start, (end) - 1, gfp) -#define ida_simple_remove(ida, id) ida_free(ida, id) - static inline bool ida_is_empty(const struct ida *ida) { return xa_empty(&ida->xa); -- cgit v1.2.3 From baa96bcb180e979a821ce3ade87a3d2349b2d640 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 14 Jul 2025 10:17:10 +0200 Subject: nvmem: update a comment related to struct nvmem_config Update a comment to match the function used in nvmem_register(). ida_simple_get() was replaced by ida_alloc() in commit 1eb51d6a4fce ("nvmem: switch to simpler IDA interface") Link: https://lkml.kernel.org/r/27a9dec93a9f79140b11a77df38b1b45bd342e09.1752480043.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/nvmem-provider.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 615a560d9edb..f3b13da78aac 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -103,7 +103,7 @@ struct nvmem_cell_info { * * Note: A default "nvmem" name will be assigned to the device if * no name is specified in its configuration. In such case "" is - * generated with ida_simple_get() and provided id field is ignored. + * generated with ida_alloc() and provided id field is ignored. * * Note: Specifying name and setting id to -1 implies a unique device * whose name is provided as-is (kept unaltered). -- cgit v1.2.3 From 2a8c51bc9391ff3c701f06ae1b678419f843dc1a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Aug 2025 00:55:07 -0700 Subject: list.h: add missing kernel-doc for basic macros kernel-doc for the basic LIST_HEAD() and LIST_HEAD_INIT() macros has been missing forever (i.e., since git). Add them for completeness. Link: https://lkml.kernel.org/r/20250819075507.113639-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Nicolas Frattaroli Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/list.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index e7e28afd28f8..ca63bdea6c1a 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -20,8 +20,16 @@ * using the generic single-entry routines. */ +/** + * LIST_HEAD_INIT - initialize a &struct list_head's links to point to itself + * @name: name of the list_head + */ #define LIST_HEAD_INIT(name) { &(name), &(name) } +/** + * LIST_HEAD - definition of a &struct list_head with initialization values + * @name: name of the list_head + */ #define LIST_HEAD(name) \ struct list_head name = LIST_HEAD_INIT(name) -- cgit v1.2.3 From 2683df6539cbc3f0eeeba11154bc0cbf042a5cee Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Mon, 25 Aug 2025 10:57:00 +0800 Subject: panic: add note that 'panic_print' parameter is deprecated Just like for 'panic_print's systcl interface, add similar note for setup of kernel cmdline parameter and parameter under /sys/module/kernel/. Also add __core_param_cb() macro, which enables to add special get/set operation for a kernel parameter. Link: https://lkml.kernel.org/r/20250825025701.81921-4-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Reviewed-by: Petr Mladek Cc: Askar Safin Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/linux/moduleparam.h | 13 +++++++++++++ kernel/panic.c | 19 ++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 3a25122d83e2..6907aedc4f74 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -349,6 +349,19 @@ static inline void kernel_param_unlock(struct module *mod) __module_param_call("", name, ¶m_ops_##type, &var, perm, \ -1, KERNEL_PARAM_FL_UNSAFE) +/** + * __core_param_cb - similar like core_param, with a set/get ops instead of type. + * @name: the name of the cmdline and sysfs parameter (often the same as var) + * @var: the variable + * @ops: the set & get operations for this parameter. + * @perm: visibility in sysfs + * + * Ideally this should be called 'core_param_cb', but the name has been + * used for module core parameter, so add the '__' prefix + */ +#define __core_param_cb(name, ops, arg, perm) \ + __module_param_call("", name, ops, arg, perm, -1, 0) + #endif /* !MODULE */ /** diff --git a/kernel/panic.c b/kernel/panic.c index 72fcbb5a071b..12a10e17ab4a 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -937,12 +937,29 @@ EXPORT_SYMBOL(__stack_chk_fail); #endif core_param(panic, panic_timeout, int, 0644); -core_param(panic_print, panic_print, ulong, 0644); core_param(pause_on_oops, pause_on_oops, int, 0644); core_param(panic_on_warn, panic_on_warn, int, 0644); core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644); core_param(panic_console_replay, panic_console_replay, bool, 0644); +static int panic_print_set(const char *val, const struct kernel_param *kp) +{ + pr_info_once("Kernel: 'panic_print' parameter will be obsoleted by both 'panic_sys_info' and 'panic_console_replay'\n"); + return param_set_ulong(val, kp); +} + +static int panic_print_get(char *val, const struct kernel_param *kp) +{ + pr_info_once("Kernel: 'panic_print' parameter will be obsoleted by both 'panic_sys_info' and 'panic_console_replay'\n"); + return param_get_ulong(val, kp); +} + +static const struct kernel_param_ops panic_print_ops = { + .set = panic_print_set, + .get = panic_print_get, +}; +__core_param_cb(panic_print, &panic_print_ops, &panic_print, 0644); + static int __init oops_setup(char *s) { if (!s) -- cgit v1.2.3 From d0d9c7235548f1d772f1e48c9d5742c65d81c705 Mon Sep 17 00:00:00 2001 From: Jinchao Wang Date: Mon, 25 Aug 2025 10:29:29 +0800 Subject: panic: introduce helper functions for panic state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "panic: introduce panic status function family", v2. This series introduces a family of helper functions to manage panic state and updates existing code to use them. Before this series, panic state helpers were scattered and inconsistent. For example, panic_in_progress() was defined in printk/printk.c, not in panic.c or panic.h. As a result, developers had to look in unexpected places to understand or re-use panic state logic. Other checks were open- coded, duplicating logic across panic, crash, and watchdog paths. The new helpers centralize the functionality in panic.c/panic.h: - panic_try_start() - panic_reset() - panic_in_progress() - panic_on_this_cpu() - panic_on_other_cpu() Patches 1–8 add the helpers and convert panic/crash and printk/nbcon code to use them. Patch 9 fixes a bug in the watchdog subsystem by skipping checks when a panic is in progress, avoiding interference with the panic CPU. Together, this makes panic state handling simpler, more discoverable, and more robust. This patch (of 9): This patch introduces four new helper functions to abstract the management of the panic_cpu variable. These functions will be used in subsequent patches to refactor existing code. The direct use of panic_cpu can be error-prone and ambiguous, as it requires manual checks to determine which CPU is handling the panic. The new helpers clarify intent: panic_try_start(): Atomically sets the current CPU as the panicking CPU. panic_reset(): Reset panic_cpu to PANIC_CPU_INVALID. panic_in_progress(): Checks if a panic has been triggered. panic_on_this_cpu(): Returns true if the current CPU is the panic originator. panic_on_other_cpu(): Returns true if a panic is on another CPU. This change lays the groundwork for improved code readability and robustness in the panic handling subsystem. Link: https://lkml.kernel.org/r/20250825022947.1596226-1-wangjinchao600@gmail.com Link: https://lkml.kernel.org/r/20250825022947.1596226-2-wangjinchao600@gmail.com Signed-off-by: Jinchao Wang Cc: Anna Schumaker Cc: Baoquan He Cc: "Darrick J. Wong" Cc: Dave Young Cc: Doug Anderson Cc: "Guilherme G. Piccoli" Cc: Helge Deller Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Joanthan Cameron Cc: Joel Granados Cc: John Ogness Cc: Kees Cook Cc: Li Huafei Cc: "Luck, Tony" Cc: Luo Gengkun Cc: Max Kellermann Cc: Nam Cao Cc: oushixiong Cc: Petr Mladek Cc: Qianqiang Liu Cc: Sergey Senozhatsky Cc: Sohil Mehta Cc: Steven Rostedt Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Zimemrmann Cc: Thorsten Blum Cc: Ville Syrjala Cc: Vivek Goyal Cc: Yicong Yang Cc: Yunhui Cui Cc: Yury Norov (NVIDIA) b Signed-off-by: Andrew Morton --- include/linux/panic.h | 6 ++++++ kernel/panic.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/printk/printk.c | 5 ----- 3 files changed, 59 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/panic.h b/include/linux/panic.h index 7be742628c25..6f972a66c13e 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -43,6 +43,12 @@ void abort(void); extern atomic_t panic_cpu; #define PANIC_CPU_INVALID -1 +bool panic_try_start(void); +void panic_reset(void); +bool panic_in_progress(void); +bool panic_on_this_cpu(void); +bool panic_on_other_cpu(void); + /* * Only to be used by arch init code. If the user over-wrote the default * CONFIG_PANIC_TIMEOUT, honor it. diff --git a/kernel/panic.c b/kernel/panic.c index 24bca263f896..010a1bfc4843 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -299,6 +299,59 @@ void __weak crash_smp_send_stop(void) atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID); +bool panic_try_start(void) +{ + int old_cpu, this_cpu; + + /* + * Only one CPU is allowed to execute the crash_kexec() code as with + * panic(). Otherwise parallel calls of panic() and crash_kexec() + * may stop each other. To exclude them, we use panic_cpu here too. + */ + old_cpu = PANIC_CPU_INVALID; + this_cpu = raw_smp_processor_id(); + + return atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu); +} +EXPORT_SYMBOL(panic_try_start); + +void panic_reset(void) +{ + atomic_set(&panic_cpu, PANIC_CPU_INVALID); +} +EXPORT_SYMBOL(panic_reset); + +bool panic_in_progress(void) +{ + return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); +} +EXPORT_SYMBOL(panic_in_progress); + +/* Return true if a panic is in progress on the current CPU. */ +bool panic_on_this_cpu(void) +{ + /* + * We can use raw_smp_processor_id() here because it is impossible for + * the task to be migrated to the panic_cpu, or away from it. If + * panic_cpu has already been set, and we're not currently executing on + * that CPU, then we never will be. + */ + return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id()); +} +EXPORT_SYMBOL(panic_on_this_cpu); + +/* + * Return true if a panic is in progress on a remote CPU. + * + * On true, the local CPU should immediately release any printing resources + * that may be needed by the panic CPU. + */ +bool panic_on_other_cpu(void) +{ + return (panic_in_progress() && !this_cpu_in_panic()); +} +EXPORT_SYMBOL(panic_on_other_cpu); + /* * A variant of panic() called from NMI context. We return if we've already * panicked on this CPU. If another CPU already panicked, loop in diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 0efbcdda9aab..5fe35f377b79 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -345,11 +345,6 @@ static void __up_console_sem(unsigned long ip) } #define up_console_sem() __up_console_sem(_RET_IP_) -static bool panic_in_progress(void) -{ - return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); -} - /* Return true if a panic is in progress on the current CPU. */ bool this_cpu_in_panic(void) { -- cgit v1.2.3 From c6be36e2997662f423edfa3979a63935873ff648 Mon Sep 17 00:00:00 2001 From: Jinchao Wang Date: Mon, 25 Aug 2025 10:29:35 +0800 Subject: panic/printk: replace this_cpu_in_panic() with panic_on_this_cpu() The helper this_cpu_in_panic() duplicated logic already provided by panic_on_this_cpu(). Remove this_cpu_in_panic() and switch all users to panic_on_this_cpu(). This simplifies the code and avoids having two helpers for the same check. Link: https://lkml.kernel.org/r/20250825022947.1596226-8-wangjinchao600@gmail.com Signed-off-by: Jinchao Wang Cc: Anna Schumaker Cc: Baoquan He Cc: "Darrick J. Wong" Cc: Dave Young Cc: Doug Anderson Cc: "Guilherme G. Piccoli" Cc: Helge Deller Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Joanthan Cameron Cc: Joel Granados Cc: John Ogness Cc: Kees Cook Cc: Li Huafei Cc: "Luck, Tony" Cc: Luo Gengkun Cc: Max Kellermann Cc: Nam Cao Cc: oushixiong Cc: Petr Mladek Cc: Qianqiang Liu Cc: Sergey Senozhatsky Cc: Sohil Mehta Cc: Steven Rostedt Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Zimemrmann Cc: Thorsten Blum Cc: Ville Syrjala Cc: Vivek Goyal Cc: Yicong Yang Cc: Yunhui Cui Cc: Yury Norov (NVIDIA) Signed-off-by: Andrew Morton --- include/linux/printk.h | 2 -- kernel/panic.c | 2 +- kernel/printk/nbcon.c | 2 +- kernel/printk/printk.c | 15 ++------------- kernel/printk/printk_ringbuffer.c | 2 +- lib/dump_stack.c | 2 +- 6 files changed, 6 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 5d22b803f51e..45c663124c9b 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -330,8 +330,6 @@ static inline bool pr_flush(int timeout_ms, bool reset_on_progress) #endif -bool this_cpu_in_panic(void); - #ifdef CONFIG_SMP extern int __printk_cpu_sync_try_get(void); extern void __printk_cpu_sync_wait(void); diff --git a/kernel/panic.c b/kernel/panic.c index c4ef86fc643f..a8b1bf60e09f 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -348,7 +348,7 @@ EXPORT_SYMBOL(panic_on_this_cpu); */ bool panic_on_other_cpu(void) { - return (panic_in_progress() && !this_cpu_in_panic()); + return (panic_in_progress() && !panic_on_this_cpu()); } EXPORT_SYMBOL(panic_on_other_cpu); diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 7490865e2f44..c6d1a4a747e9 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1394,7 +1394,7 @@ enum nbcon_prio nbcon_get_default_prio(void) { unsigned int *cpu_emergency_nesting; - if (this_cpu_in_panic()) + if (panic_on_this_cpu()) return NBCON_PRIO_PANIC; cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5fe35f377b79..faa8b1f0585b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -17,6 +17,7 @@ * 01Mar01 Andrew Morton */ +#include "linux/panic.h" #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include @@ -345,18 +346,6 @@ static void __up_console_sem(unsigned long ip) } #define up_console_sem() __up_console_sem(_RET_IP_) -/* Return true if a panic is in progress on the current CPU. */ -bool this_cpu_in_panic(void) -{ - /* - * We can use raw_smp_processor_id() here because it is impossible for - * the task to be migrated to the panic_cpu, or away from it. If - * panic_cpu has already been set, and we're not currently executing on - * that CPU, then we never will be. - */ - return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id()); -} - /* * Return true if a panic is in progress on a remote CPU. * @@ -365,7 +354,7 @@ bool this_cpu_in_panic(void) */ bool other_cpu_in_panic(void) { - return (panic_in_progress() && !this_cpu_in_panic()); + return (panic_in_progress() && !panic_on_this_cpu()); } /* diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index d9fb053cff67..e2a1b2d34d2b 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -2143,7 +2143,7 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, * But it would have the sequence number returned * by "prb_next_reserve_seq() - 1". */ - if (this_cpu_in_panic() && + if (panic_on_this_cpu() && (!debug_non_panic_cpus || legacy_allow_panic_sync) && ((*seq + 1) < prb_next_reserve_seq(rb))) { (*seq)++; diff --git a/lib/dump_stack.c b/lib/dump_stack.c index b3a85fe8b673..f0c78b5b5324 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -102,7 +102,7 @@ static void __dump_stack(const char *log_lvl) */ asmlinkage __visible void dump_stack_lvl(const char *log_lvl) { - bool in_panic = this_cpu_in_panic(); + bool in_panic = panic_on_this_cpu(); unsigned long flags; /* -- cgit v1.2.3 From 7b1e502eb17c23fa6459a19bfe5974bffdb95574 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 6 Sep 2025 21:38:57 -0700 Subject: kernel.h: add comments for enum system_states Provide some basic comments about the system_states and what they imply. Also convert the comments to kernel-doc format. Split the enum declaration from the definition of the system_state variable so that kernel-doc notation works cleanly with it. This is picked up by Documentation/driver-api/basics.rst so it does not need further inclusion in the kernel docbooks. Link: https://lkml.kernel.org/r/20250907043857.2941203-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Acked-by: Rafael J. Wysocki # v1 Reviewed-by: Mauro Carvalho Chehab [v5] Cc: "Brown, Len" Cc: Greg Kroah-Hartman Cc: James Bottomley Cc: Jani Nikula Cc: Jonathan Corbet Cc: Mauro Carvalho Chehab Cc: Pavel Machek Signed-off-by: Andrew Morton --- include/linux/kernel.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 989315dabb86..5b46924fdff5 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -164,11 +164,23 @@ extern int root_mountflags; extern bool early_boot_irqs_disabled; -/* - * Values used for system_state. Ordering of the states must not be changed +/** + * enum system_states - Values used for system_state. + * + * @SYSTEM_BOOTING: %0, no init needed + * @SYSTEM_SCHEDULING: system is ready for scheduling; OK to use RCU + * @SYSTEM_FREEING_INITMEM: system is freeing all of initmem; almost running + * @SYSTEM_RUNNING: system is up and running + * @SYSTEM_HALT: system entered clean system halt state + * @SYSTEM_POWER_OFF: system entered shutdown/clean power off state + * @SYSTEM_RESTART: system entered emergency power off or normal restart + * @SYSTEM_SUSPEND: system entered suspend or hibernate state + * + * Note: + * Ordering of the states must not be changed * as code checks for <, <=, >, >= STATE. */ -extern enum system_states { +enum system_states { SYSTEM_BOOTING, SYSTEM_SCHEDULING, SYSTEM_FREEING_INITMEM, @@ -177,7 +189,8 @@ extern enum system_states { SYSTEM_POWER_OFF, SYSTEM_RESTART, SYSTEM_SUSPEND, -} system_state; +}; +extern enum system_states system_state; /* * General tracing related utility functions - trace_printk(), -- cgit v1.2.3 From d6d5116391857fc78fad9aa42317b36e4ce17b58 Mon Sep 17 00:00:00 2001 From: Evangelos Petrongonas Date: Thu, 21 Aug 2025 17:58:59 +0000 Subject: kexec: introduce is_kho_boot() Patch series "efi: Fix EFI boot with kexec handover (KHO)", v3. This patch series fixes a kernel panic that occurs when booting with both EFI and KHO (Kexec HandOver) enabled. The issue arises because EFI's `reserve_regions()` clears all memory regions with `memblock_remove(0, PHYS_ADDR_MAX)` before rebuilding them from EFI data. This destroys KHO scratch regions that were set up early during device tree scanning, causing a panic as the kernel has no valid memory regions for early allocations. The first patch introduces `is_kho_boot()` to allow early boot components to reliably detect if the kernel was booted via KHO-enabled kexec. The existing `kho_is_enabled()` only checks the command line and doesn't verify if an actual KHO FDT was passed. The second patch modifies EFI's `reserve_regions()` to selectively remove only non-KHO memory regions when KHO is active, preserving the critical scratch regions while still allowing EFI to rebuild its memory map. This patch (of 3): During early initialisation, after a kexec, other components, like EFI need to know if a KHO enabled kexec is performed. The `kho_is_enabled` function is not enough as in the early stages, it only reflects whether the cmdline has KHO enabled, not if an actual KHO FDT exists. Extend the KHO API with `is_kho_boot()` to provide a way for components to check if a KHO enabled kexec is performed. Link: https://lkml.kernel.org/r/cover.1755721529.git.epetron@amazon.de Link: https://lkml.kernel.org/r/7dc6674a76bf6e68cca0222ccff32427699cc02e.1755721529.git.epetron@amazon.de Signed-off-by: Evangelos Petrongonas Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Pratyush Yadav Cc: Alexander Graf Cc: Ard Biesheuvel Cc: Baoquan He Cc: Changyuan Lyu Signed-off-by: Andrew Morton --- include/linux/kexec_handover.h | 6 ++++++ kernel/kexec_handover.c | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h index 348844cffb13..559d13a3bc44 100644 --- a/include/linux/kexec_handover.h +++ b/include/linux/kexec_handover.h @@ -40,6 +40,7 @@ struct kho_serialization; #ifdef CONFIG_KEXEC_HANDOVER bool kho_is_enabled(void); +bool is_kho_boot(void); int kho_preserve_folio(struct folio *folio); int kho_preserve_phys(phys_addr_t phys, size_t size); @@ -60,6 +61,11 @@ static inline bool kho_is_enabled(void) return false; } +static inline bool is_kho_boot(void) +{ + return false; +} + static inline int kho_preserve_folio(struct folio *folio) { return -EOPNOTSUPP; diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index ecd1ac210dbd..49a39aee6a8e 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -951,6 +951,26 @@ static const void *kho_get_fdt(void) return kho_in.fdt_phys ? phys_to_virt(kho_in.fdt_phys) : NULL; } +/** + * is_kho_boot - check if current kernel was booted via KHO-enabled + * kexec + * + * This function checks if the current kernel was loaded through a kexec + * operation with KHO enabled, by verifying that a valid KHO FDT + * was passed. + * + * Note: This function returns reliable results only after + * kho_populate() has been called during early boot. Before that, + * it may return false even if KHO data is present. + * + * Return: true if booted via KHO-enabled kexec, false otherwise + */ +bool is_kho_boot(void) +{ + return !!kho_get_fdt(); +} +EXPORT_SYMBOL_GPL(is_kho_boot); + /** * kho_retrieve_subtree - retrieve a preserved sub FDT by its name. * @name: the name of the sub FDT passed to kho_add_subtree(). -- cgit v1.2.3 From 39f17c707454290900b608ee5a200b5db9245626 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 14 Sep 2025 13:09:08 +0200 Subject: sched/task.h: fix the wrong comment on task_lock() nesting with tasklist_lock The ancient comment above task_lock() states that it can be nested outside of read_lock(&tasklist_lock), but this is no longer true: CPU_0 CPU_1 CPU_2 task_lock() read_lock(tasklist) write_lock_irq(tasklist) read_lock(tasklist) task_lock() Unless CPU_0 calls read_lock() in IRQ context, queued_read_lock_slowpath() won't get the lock immediately, it will spin waiting for the pending writer on CPU_2, resulting in a deadlock. Link: https://lkml.kernel.org/r/20250914110908.GA18769@redhat.com Signed-off-by: Oleg Nesterov Cc: Christian Brauner Cc: Jiri Slaby Cc: Mateusz Guzik Signed-off-by: Andrew Morton --- include/linux/sched/task.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index ea41795a352b..8ff98b18b24b 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -210,9 +210,8 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) * pins the final release of task.io_context. Also protects ->cpuset and * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist. * - * Nests both inside and outside of read_lock(&tasklist_lock). - * It must not be nested with write_lock_irq(&tasklist_lock), - * neither inside nor outside. + * Nests inside of read_lock(&tasklist_lock). It must not be nested with + * write_lock_irq(&tasklist_lock), neither inside nor outside. */ static inline void task_lock(struct task_struct *p) { -- cgit v1.2.3